program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { tensor var_935_axes_0 = const()[name = string("op_935_axes_0"), val = tensor([0])]; tensor var_935 = expand_dims(axes = var_935_axes_0, x = cache_length)[name = string("op_935")]; tensor var_956_axes_0 = const()[name = string("op_956_axes_0"), val = tensor([-1])]; string position_ids_to_fp16_dtype_0 = const()[name = string("position_ids_to_fp16_dtype_0"), val = string("fp16")]; tensor var_935_to_fp16 = cast(dtype = position_ids_to_fp16_dtype_0, x = var_935)[name = string("cast_0")]; tensor var_956_cast_fp16 = expand_dims(axes = var_956_axes_0, x = var_935_to_fp16)[name = string("op_956_cast_fp16")]; bool var_957_transpose_x_0 = const()[name = string("op_957_transpose_x_0"), val = bool(false)]; bool var_957_transpose_y_0 = const()[name = string("op_957_transpose_y_0"), val = bool(false)]; tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_957_cast_fp16 = matmul(transpose_x = var_957_transpose_x_0, transpose_y = var_957_transpose_y_0, x = const_0_to_fp16, y = var_956_cast_fp16)[name = string("op_957_cast_fp16")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_957_cast_fp16)[name = string("transpose_112")]; tensor emb_cast_fp16 = concat(axis = var_962, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor var_964_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_964_cast_fp16")]; tensor var_972_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_972_cast_fp16")]; tensor var_989_begin_0 = const()[name = string("op_989_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_989_end_0 = const()[name = string("op_989_end_0"), val = tensor([1, 1024, 1, 256])]; tensor var_989_end_mask_0 = const()[name = string("op_989_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_989_cast_fp16 = slice_by_index(begin = var_989_begin_0, end = var_989_end_0, end_mask = var_989_end_mask_0, x = key_cache)[name = string("op_989_cast_fp16")]; tensor var_1009_begin_0 = const()[name = string("op_1009_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1009_end_0 = const()[name = string("op_1009_end_0"), val = tensor([1, 1024, 1, 256])]; tensor var_1009_end_mask_0 = const()[name = string("op_1009_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1009_cast_fp16 = slice_by_index(begin = var_1009_begin_0, end = var_1009_end_0, end_mask = var_1009_end_mask_0, x = value_cache)[name = string("op_1009_cast_fp16")]; tensor var_1021_axes_0 = const()[name = string("op_1021_axes_0"), val = tensor([-1])]; tensor var_1021_cast_fp16 = squeeze(axes = var_1021_axes_0, x = input_embeds)[name = string("op_1021_cast_fp16")]; tensor var_1023_axes_0 = const()[name = string("op_1023_axes_0"), val = tensor([-1])]; tensor var_1023_cast_fp16 = squeeze(axes = var_1023_axes_0, x = var_1021_cast_fp16)[name = string("op_1023_cast_fp16")]; tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([0])]; tensor hidden_states_1_cast_fp16 = expand_dims(axes = hidden_states_1_axes_0, x = var_1023_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; fp16 var_1029_promoted_to_fp16 = const()[name = string("op_1029_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1035_cast_fp16 = pow(x = hidden_states_1_cast_fp16, y = var_1029_promoted_to_fp16)[name = string("op_1035_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_1035_cast_fp16)[name = string("variance_1_cast_fp16")]; fp16 var_1038_to_fp16 = const()[name = string("op_1038_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1039_cast_fp16 = add(x = variance_1_cast_fp16, y = var_1038_to_fp16)[name = string("op_1039_cast_fp16")]; fp32 var_1040_epsilon_0 = const()[name = string("op_1040_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1040_cast_fp16 = rsqrt(epsilon = var_1040_epsilon_0, x = var_1039_cast_fp16)[name = string("op_1040_cast_fp16")]; tensor hidden_states_5_cast_fp16 = mul(x = hidden_states_1_cast_fp16, y = var_1040_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256)))]; tensor input_1_cast_fp16 = mul(x = const_1_to_fp16, y = hidden_states_5_cast_fp16)[name = string("input_1_cast_fp16")]; tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099584))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2100160)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3152960))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3153536)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3155648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204288))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_1057 = const()[name = string("op_1057"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_7_cast_fp16 = reshape(shape = var_1057, x = linear_0_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor var_1063 = const()[name = string("op_1063"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_13_cast_fp16 = reshape(shape = var_1063, x = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor var_1069 = const()[name = string("op_1069"), val = tensor([1, 1, 8, 128])]; tensor v_3_cast_fp16 = reshape(shape = var_1069, x = linear_2_cast_fp16)[name = string("v_3_cast_fp16")]; fp16 var_1074_promoted_to_fp16 = const()[name = string("op_1074_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1080_cast_fp16 = pow(x = hidden_states_7_cast_fp16, y = var_1074_promoted_to_fp16)[name = string("op_1080_cast_fp16")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_1080_cast_fp16)[name = string("variance_3_cast_fp16")]; fp16 var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1084_cast_fp16 = add(x = variance_3_cast_fp16, y = var_1083_to_fp16)[name = string("op_1084_cast_fp16")]; fp32 var_1085_epsilon_0 = const()[name = string("op_1085_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1085_cast_fp16 = rsqrt(epsilon = var_1085_epsilon_0, x = var_1084_cast_fp16)[name = string("op_1085_cast_fp16")]; tensor hidden_states_11_cast_fp16 = mul(x = hidden_states_7_cast_fp16, y = var_1085_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204864)))]; tensor q_3_cast_fp16 = mul(x = const_2_to_fp16, y = hidden_states_11_cast_fp16)[name = string("q_3_cast_fp16")]; fp16 var_1092_promoted_to_fp16 = const()[name = string("op_1092_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1098_cast_fp16 = pow(x = hidden_states_13_cast_fp16, y = var_1092_promoted_to_fp16)[name = string("op_1098_cast_fp16")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_1098_cast_fp16)[name = string("variance_5_cast_fp16")]; fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1102_cast_fp16 = add(x = variance_5_cast_fp16, y = var_1101_to_fp16)[name = string("op_1102_cast_fp16")]; fp32 var_1103_epsilon_0 = const()[name = string("op_1103_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1103_cast_fp16 = rsqrt(epsilon = var_1103_epsilon_0, x = var_1102_cast_fp16)[name = string("op_1103_cast_fp16")]; tensor hidden_states_17_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = var_1103_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor const_3_to_fp16 = const()[name = string("const_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205184)))]; tensor k_3_cast_fp16 = mul(x = const_3_to_fp16, y = hidden_states_17_cast_fp16)[name = string("k_3_cast_fp16")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_5_perm_0 = const()[name = string("v_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_3_axes_0 = const()[name = string("cos_3_axes_0"), val = tensor([1])]; tensor cos_3_cast_fp16 = expand_dims(axes = cos_3_axes_0, x = var_964_cast_fp16)[name = string("cos_3_cast_fp16")]; tensor sin_3_axes_0 = const()[name = string("sin_3_axes_0"), val = tensor([1])]; tensor sin_3_cast_fp16 = expand_dims(axes = sin_3_axes_0, x = var_972_cast_fp16)[name = string("sin_3_cast_fp16")]; tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = q_3_cast_fp16)[name = string("transpose_111")]; tensor var_1120_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1120_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_5_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_5_cast_fp16)[name = string("x2_1_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1141_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_1141_cast_fp16")]; int32 var_1143 = const()[name = string("op_1143"), val = int32(-1)]; bool var_1144_interleave_0 = const()[name = string("op_1144_interleave_0"), val = bool(false)]; tensor var_1144_cast_fp16 = concat(axis = var_1143, interleave = var_1144_interleave_0, values = (var_1141_cast_fp16, x1_1_cast_fp16))[name = string("op_1144_cast_fp16")]; tensor var_1145_cast_fp16 = mul(x = var_1144_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1145_cast_fp16")]; tensor q_7_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1145_cast_fp16)[name = string("q_7_cast_fp16")]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = k_3_cast_fp16)[name = string("transpose_110")]; tensor var_1148_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1148_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_5_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_5_cast_fp16)[name = string("x2_3_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1169_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1169_cast_fp16")]; int32 var_1171 = const()[name = string("op_1171"), val = int32(-1)]; bool var_1172_interleave_0 = const()[name = string("op_1172_interleave_0"), val = bool(false)]; tensor var_1172_cast_fp16 = concat(axis = var_1171, interleave = var_1172_interleave_0, values = (var_1169_cast_fp16, x1_3_cast_fp16))[name = string("op_1172_cast_fp16")]; tensor var_1173_cast_fp16 = mul(x = var_1172_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1173_cast_fp16")]; tensor k_7_cast_fp16 = add(x = var_1148_cast_fp16, y = var_1173_cast_fp16)[name = string("k_7_cast_fp16")]; tensor var_1180 = const()[name = string("op_1180"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_1_cast_fp16 = reshape(shape = var_1180, x = k_7_cast_fp16)[name = string("nk_flat_1_cast_fp16")]; tensor var_1186 = const()[name = string("op_1186"), val = tensor([1, 1024, 1, 1])]; tensor v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = v_3_cast_fp16)[name = string("transpose_109")]; tensor nv_flat_1_cast_fp16 = reshape(shape = var_1186, x = v_5_cast_fp16)[name = string("nv_flat_1_cast_fp16")]; tensor var_1189_axes_0 = const()[name = string("op_1189_axes_0"), val = tensor([1])]; tensor var_1189_cast_fp16 = expand_dims(axes = var_1189_axes_0, x = kv_cache_update_mask)[name = string("op_1189_cast_fp16")]; tensor update_mask_1_axes_0 = const()[name = string("update_mask_1_axes_0"), val = tensor([2])]; tensor update_mask_1_cast_fp16 = expand_dims(axes = update_mask_1_axes_0, x = var_1189_cast_fp16)[name = string("update_mask_1_cast_fp16")]; fp16 var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = fp16(0x1p+0)]; tensor var_1194_cast_fp16 = sub(x = var_1192_to_fp16, y = update_mask_1_cast_fp16)[name = string("op_1194_cast_fp16")]; tensor var_1195_cast_fp16 = mul(x = var_989_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1195_cast_fp16")]; tensor var_1196_cast_fp16 = mul(x = nk_flat_1_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1196_cast_fp16")]; tensor key_cache_5_cast_fp16 = add(x = var_1195_cast_fp16, y = var_1196_cast_fp16)[name = string("key_cache_5_cast_fp16")]; tensor var_1202_cast_fp16 = mul(x = var_1009_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1202_cast_fp16")]; tensor var_1203_cast_fp16 = mul(x = nv_flat_1_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1203_cast_fp16")]; tensor value_cache_5_cast_fp16 = add(x = var_1202_cast_fp16, y = var_1203_cast_fp16)[name = string("value_cache_5_cast_fp16")]; tensor kc_1_axes_0 = const()[name = string("kc_1_axes_0"), val = tensor([2])]; tensor kc_1_cast_fp16 = squeeze(axes = kc_1_axes_0, x = key_cache_5_cast_fp16)[name = string("kc_1_cast_fp16")]; tensor var_1212 = const()[name = string("op_1212"), val = tensor([1, 8, 128, 256])]; tensor kc_3_cast_fp16 = reshape(shape = var_1212, x = kc_1_cast_fp16)[name = string("kc_3_cast_fp16")]; tensor vc_1_axes_0 = const()[name = string("vc_1_axes_0"), val = tensor([2])]; tensor vc_1_cast_fp16 = squeeze(axes = vc_1_axes_0, x = value_cache_5_cast_fp16)[name = string("vc_1_cast_fp16")]; tensor var_1220 = const()[name = string("op_1220"), val = tensor([1, 8, 128, 256])]; tensor vc_3_cast_fp16 = reshape(shape = var_1220, x = vc_1_cast_fp16)[name = string("vc_3_cast_fp16")]; tensor var_1223_axes_0 = const()[name = string("op_1223_axes_0"), val = tensor([2])]; tensor var_1223_cast_fp16 = expand_dims(axes = var_1223_axes_0, x = kc_3_cast_fp16)[name = string("op_1223_cast_fp16")]; tensor var_1231_reps_0 = const()[name = string("op_1231_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1231_cast_fp16 = tile(reps = var_1231_reps_0, x = var_1223_cast_fp16)[name = string("op_1231_cast_fp16")]; tensor var_1236 = const()[name = string("op_1236"), val = tensor([1, 16, 128, 256])]; tensor kc_5_cast_fp16 = reshape(shape = var_1236, x = var_1231_cast_fp16)[name = string("kc_5_cast_fp16")]; tensor var_1239_axes_0 = const()[name = string("op_1239_axes_0"), val = tensor([2])]; tensor var_1239_cast_fp16 = expand_dims(axes = var_1239_axes_0, x = vc_3_cast_fp16)[name = string("op_1239_cast_fp16")]; tensor var_1247_reps_0 = const()[name = string("op_1247_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1247_cast_fp16 = tile(reps = var_1247_reps_0, x = var_1239_cast_fp16)[name = string("op_1247_cast_fp16")]; tensor var_1252 = const()[name = string("op_1252"), val = tensor([1, 16, 128, 256])]; tensor vc_5_cast_fp16 = reshape(shape = var_1252, x = var_1247_cast_fp16)[name = string("vc_5_cast_fp16")]; bool var_1254_transpose_x_0 = const()[name = string("op_1254_transpose_x_0"), val = bool(false)]; bool var_1254_transpose_y_0 = const()[name = string("op_1254_transpose_y_0"), val = bool(false)]; tensor var_1254_cast_fp16 = matmul(transpose_x = var_1254_transpose_x_0, transpose_y = var_1254_transpose_y_0, x = q_7_cast_fp16, y = kc_5_cast_fp16)[name = string("op_1254_cast_fp16")]; fp16 _inversed_attn_weights_1_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_1_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_1_cast_fp16 = mul(x = var_1254_cast_fp16, y = _inversed_attn_weights_1_y_0_to_fp16)[name = string("_inversed_attn_weights_1_cast_fp16")]; tensor var_1258_axes_0 = const()[name = string("op_1258_axes_0"), val = tensor([1])]; tensor var_1258_cast_fp16 = expand_dims(axes = var_1258_axes_0, x = key_padding_mask)[name = string("op_1258_cast_fp16")]; tensor mask_1_axes_0 = const()[name = string("mask_1_axes_0"), val = tensor([2])]; tensor mask_1_cast_fp16 = expand_dims(axes = mask_1_axes_0, x = var_1258_cast_fp16)[name = string("mask_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = _inversed_attn_weights_1_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; int32 var_1268 = const()[name = string("op_1268"), val = int32(-1)]; tensor attn_weights_7_cast_fp16 = softmax(axis = var_1268, x = attn_weights_3_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(false)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(true)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_7_cast_fp16, y = vc_5_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_1277_perm_0 = const()[name = string("op_1277_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1281 = const()[name = string("op_1281"), val = tensor([1, 1, -1])]; tensor var_1277_cast_fp16 = transpose(perm = var_1277_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_108")]; tensor input_3_cast_fp16 = reshape(shape = var_1281, x = var_1277_cast_fp16)[name = string("input_3_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6302720))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor var_1287_axes_0 = const()[name = string("op_1287_axes_0"), val = tensor([0])]; tensor var_1287_cast_fp16 = squeeze(axes = var_1287_axes_0, x = linear_3_cast_fp16)[name = string("op_1287_cast_fp16")]; tensor var_1289_axes_0 = const()[name = string("op_1289_axes_0"), val = tensor([0])]; tensor var_1289_cast_fp16 = squeeze(axes = var_1289_axes_0, x = var_1287_cast_fp16)[name = string("op_1289_cast_fp16")]; tensor var_1291_axes_0 = const()[name = string("op_1291_axes_0"), val = tensor([-1])]; tensor var_1291_cast_fp16 = expand_dims(axes = var_1291_axes_0, x = var_1289_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor attn_4d_1_axes_0 = const()[name = string("attn_4d_1_axes_0"), val = tensor([-1])]; tensor attn_4d_1_cast_fp16 = expand_dims(axes = attn_4d_1_axes_0, x = var_1291_cast_fp16)[name = string("attn_4d_1_cast_fp16")]; tensor hidden_1_cast_fp16 = add(x = input_embeds, y = attn_4d_1_cast_fp16)[name = string("hidden_1_cast_fp16")]; tensor var_1297_axes_0 = const()[name = string("op_1297_axes_0"), val = tensor([-1])]; tensor var_1297_cast_fp16 = squeeze(axes = var_1297_axes_0, x = hidden_1_cast_fp16)[name = string("op_1297_cast_fp16")]; tensor var_1299_axes_0 = const()[name = string("op_1299_axes_0"), val = tensor([-1])]; tensor var_1299_cast_fp16 = squeeze(axes = var_1299_axes_0, x = var_1297_cast_fp16)[name = string("op_1299_cast_fp16")]; tensor hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor([0])]; tensor hidden_states_19_cast_fp16 = expand_dims(axes = hidden_states_19_axes_0, x = var_1299_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; fp16 var_1305_promoted_to_fp16 = const()[name = string("op_1305_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1311_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_1305_promoted_to_fp16)[name = string("op_1311_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_1311_cast_fp16)[name = string("variance_7_cast_fp16")]; fp16 var_1314_to_fp16 = const()[name = string("op_1314_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1315_cast_fp16 = add(x = variance_7_cast_fp16, y = var_1314_to_fp16)[name = string("op_1315_cast_fp16")]; fp32 var_1316_epsilon_0 = const()[name = string("op_1316_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1316_cast_fp16 = rsqrt(epsilon = var_1316_epsilon_0, x = var_1315_cast_fp16)[name = string("op_1316_cast_fp16")]; tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_1316_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor const_10_to_fp16 = const()[name = string("const_10_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6303296)))]; tensor input_5_cast_fp16 = mul(x = const_10_to_fp16, y = hidden_states_23_cast_fp16)[name = string("input_5_cast_fp16")]; tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6305408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451200))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451776)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_1326_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_1326_cast_fp16")]; tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9457984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12603776))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_9_cast_fp16 = mul(x = var_1326_cast_fp16, y = linear_5_cast_fp16)[name = string("input_9_cast_fp16")]; tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12604352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750144))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor var_1333_axes_0 = const()[name = string("op_1333_axes_0"), val = tensor([0])]; tensor var_1333_cast_fp16 = squeeze(axes = var_1333_axes_0, x = linear_6_cast_fp16)[name = string("op_1333_cast_fp16")]; tensor var_1335_axes_0 = const()[name = string("op_1335_axes_0"), val = tensor([0])]; tensor var_1335_cast_fp16 = squeeze(axes = var_1335_axes_0, x = var_1333_cast_fp16)[name = string("op_1335_cast_fp16")]; tensor var_1337_axes_0 = const()[name = string("op_1337_axes_0"), val = tensor([-1])]; tensor var_1337_cast_fp16 = expand_dims(axes = var_1337_axes_0, x = var_1335_cast_fp16)[name = string("op_1337_cast_fp16")]; tensor mlp_4d_1_axes_0 = const()[name = string("mlp_4d_1_axes_0"), val = tensor([-1])]; tensor mlp_4d_1_cast_fp16 = expand_dims(axes = mlp_4d_1_axes_0, x = var_1337_cast_fp16)[name = string("mlp_4d_1_cast_fp16")]; tensor hidden_3_cast_fp16 = add(x = hidden_1_cast_fp16, y = mlp_4d_1_cast_fp16)[name = string("hidden_3_cast_fp16")]; tensor var_1351_begin_0 = const()[name = string("op_1351_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_1351_end_0 = const()[name = string("op_1351_end_0"), val = tensor([1, 2048, 1, 256])]; tensor var_1351_end_mask_0 = const()[name = string("op_1351_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1351_cast_fp16 = slice_by_index(begin = var_1351_begin_0, end = var_1351_end_0, end_mask = var_1351_end_mask_0, x = key_cache)[name = string("op_1351_cast_fp16")]; tensor var_1371_begin_0 = const()[name = string("op_1371_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_1371_end_0 = const()[name = string("op_1371_end_0"), val = tensor([1, 2048, 1, 256])]; tensor var_1371_end_mask_0 = const()[name = string("op_1371_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1371_cast_fp16 = slice_by_index(begin = var_1371_begin_0, end = var_1371_end_0, end_mask = var_1371_end_mask_0, x = value_cache)[name = string("op_1371_cast_fp16")]; tensor var_1383_axes_0 = const()[name = string("op_1383_axes_0"), val = tensor([-1])]; tensor var_1383_cast_fp16 = squeeze(axes = var_1383_axes_0, x = hidden_3_cast_fp16)[name = string("op_1383_cast_fp16")]; tensor var_1385_axes_0 = const()[name = string("op_1385_axes_0"), val = tensor([-1])]; tensor var_1385_cast_fp16 = squeeze(axes = var_1385_axes_0, x = var_1383_cast_fp16)[name = string("op_1385_cast_fp16")]; tensor hidden_states_25_axes_0 = const()[name = string("hidden_states_25_axes_0"), val = tensor([0])]; tensor hidden_states_25_cast_fp16 = expand_dims(axes = hidden_states_25_axes_0, x = var_1385_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; fp16 var_1391_promoted_to_fp16 = const()[name = string("op_1391_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1397_cast_fp16 = pow(x = hidden_states_25_cast_fp16, y = var_1391_promoted_to_fp16)[name = string("op_1397_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_1397_cast_fp16)[name = string("variance_9_cast_fp16")]; fp16 var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1401_cast_fp16 = add(x = variance_9_cast_fp16, y = var_1400_to_fp16)[name = string("op_1401_cast_fp16")]; fp32 var_1402_epsilon_0 = const()[name = string("op_1402_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1402_cast_fp16 = rsqrt(epsilon = var_1402_epsilon_0, x = var_1401_cast_fp16)[name = string("op_1402_cast_fp16")]; tensor hidden_states_29_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = var_1402_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor const_11_to_fp16 = const()[name = string("const_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750720)))]; tensor input_11_cast_fp16 = mul(x = const_11_to_fp16, y = hidden_states_29_cast_fp16)[name = string("input_11_cast_fp16")]; tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850048))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899264))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19948480))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_1419 = const()[name = string("op_1419"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_31_cast_fp16 = reshape(shape = var_1419, x = linear_7_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_37_cast_fp16 = reshape(shape = var_1425, x = linear_8_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor var_1431 = const()[name = string("op_1431"), val = tensor([1, 1, 8, 128])]; tensor v_9_cast_fp16 = reshape(shape = var_1431, x = linear_9_cast_fp16)[name = string("v_9_cast_fp16")]; fp16 var_1436_promoted_to_fp16 = const()[name = string("op_1436_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1442_cast_fp16 = pow(x = hidden_states_31_cast_fp16, y = var_1436_promoted_to_fp16)[name = string("op_1442_cast_fp16")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_1442_cast_fp16)[name = string("variance_11_cast_fp16")]; fp16 var_1445_to_fp16 = const()[name = string("op_1445_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1446_cast_fp16 = add(x = variance_11_cast_fp16, y = var_1445_to_fp16)[name = string("op_1446_cast_fp16")]; fp32 var_1447_epsilon_0 = const()[name = string("op_1447_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1447_cast_fp16 = rsqrt(epsilon = var_1447_epsilon_0, x = var_1446_cast_fp16)[name = string("op_1447_cast_fp16")]; tensor hidden_states_35_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = var_1447_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor const_12_to_fp16 = const()[name = string("const_12_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949056)))]; tensor q_11_cast_fp16 = mul(x = const_12_to_fp16, y = hidden_states_35_cast_fp16)[name = string("q_11_cast_fp16")]; fp16 var_1454_promoted_to_fp16 = const()[name = string("op_1454_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1460_cast_fp16 = pow(x = hidden_states_37_cast_fp16, y = var_1454_promoted_to_fp16)[name = string("op_1460_cast_fp16")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_1460_cast_fp16)[name = string("variance_13_cast_fp16")]; fp16 var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1464_cast_fp16 = add(x = variance_13_cast_fp16, y = var_1463_to_fp16)[name = string("op_1464_cast_fp16")]; fp32 var_1465_epsilon_0 = const()[name = string("op_1465_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1465_cast_fp16 = rsqrt(epsilon = var_1465_epsilon_0, x = var_1464_cast_fp16)[name = string("op_1465_cast_fp16")]; tensor hidden_states_41_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = var_1465_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949376)))]; tensor k_11_cast_fp16 = mul(x = const_13_to_fp16, y = hidden_states_41_cast_fp16)[name = string("k_11_cast_fp16")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_11_perm_0 = const()[name = string("v_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = q_11_cast_fp16)[name = string("transpose_107")]; tensor var_1482_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1482_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_13_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_13_cast_fp16)[name = string("x2_5_cast_fp16")]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1503_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1503_cast_fp16")]; int32 var_1505 = const()[name = string("op_1505"), val = int32(-1)]; bool var_1506_interleave_0 = const()[name = string("op_1506_interleave_0"), val = bool(false)]; tensor var_1506_cast_fp16 = concat(axis = var_1505, interleave = var_1506_interleave_0, values = (var_1503_cast_fp16, x1_5_cast_fp16))[name = string("op_1506_cast_fp16")]; tensor var_1507_cast_fp16 = mul(x = var_1506_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1507_cast_fp16")]; tensor q_15_cast_fp16 = add(x = var_1482_cast_fp16, y = var_1507_cast_fp16)[name = string("q_15_cast_fp16")]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = k_11_cast_fp16)[name = string("transpose_106")]; tensor var_1510_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1510_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_13_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_13_cast_fp16)[name = string("x2_7_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1531_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1531_cast_fp16")]; int32 var_1533 = const()[name = string("op_1533"), val = int32(-1)]; bool var_1534_interleave_0 = const()[name = string("op_1534_interleave_0"), val = bool(false)]; tensor var_1534_cast_fp16 = concat(axis = var_1533, interleave = var_1534_interleave_0, values = (var_1531_cast_fp16, x1_7_cast_fp16))[name = string("op_1534_cast_fp16")]; tensor var_1535_cast_fp16 = mul(x = var_1534_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1535_cast_fp16")]; tensor k_15_cast_fp16 = add(x = var_1510_cast_fp16, y = var_1535_cast_fp16)[name = string("k_15_cast_fp16")]; tensor var_1542 = const()[name = string("op_1542"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_3_cast_fp16 = reshape(shape = var_1542, x = k_15_cast_fp16)[name = string("nk_flat_3_cast_fp16")]; tensor var_1548 = const()[name = string("op_1548"), val = tensor([1, 1024, 1, 1])]; tensor v_11_cast_fp16 = transpose(perm = v_11_perm_0, x = v_9_cast_fp16)[name = string("transpose_105")]; tensor nv_flat_3_cast_fp16 = reshape(shape = var_1548, x = v_11_cast_fp16)[name = string("nv_flat_3_cast_fp16")]; tensor var_1557_cast_fp16 = mul(x = var_1351_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1557_cast_fp16")]; tensor var_1558_cast_fp16 = mul(x = nk_flat_3_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1558_cast_fp16")]; tensor key_cache_9_cast_fp16 = add(x = var_1557_cast_fp16, y = var_1558_cast_fp16)[name = string("key_cache_9_cast_fp16")]; tensor var_1564_cast_fp16 = mul(x = var_1371_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1564_cast_fp16")]; tensor var_1565_cast_fp16 = mul(x = nv_flat_3_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1565_cast_fp16")]; tensor value_cache_9_cast_fp16 = add(x = var_1564_cast_fp16, y = var_1565_cast_fp16)[name = string("value_cache_9_cast_fp16")]; tensor kc_7_axes_0 = const()[name = string("kc_7_axes_0"), val = tensor([2])]; tensor kc_7_cast_fp16 = squeeze(axes = kc_7_axes_0, x = key_cache_9_cast_fp16)[name = string("kc_7_cast_fp16")]; tensor var_1574 = const()[name = string("op_1574"), val = tensor([1, 8, 128, 256])]; tensor kc_9_cast_fp16 = reshape(shape = var_1574, x = kc_7_cast_fp16)[name = string("kc_9_cast_fp16")]; tensor vc_7_axes_0 = const()[name = string("vc_7_axes_0"), val = tensor([2])]; tensor vc_7_cast_fp16 = squeeze(axes = vc_7_axes_0, x = value_cache_9_cast_fp16)[name = string("vc_7_cast_fp16")]; tensor var_1582 = const()[name = string("op_1582"), val = tensor([1, 8, 128, 256])]; tensor vc_9_cast_fp16 = reshape(shape = var_1582, x = vc_7_cast_fp16)[name = string("vc_9_cast_fp16")]; tensor var_1585_axes_0 = const()[name = string("op_1585_axes_0"), val = tensor([2])]; tensor var_1585_cast_fp16 = expand_dims(axes = var_1585_axes_0, x = kc_9_cast_fp16)[name = string("op_1585_cast_fp16")]; tensor var_1593_reps_0 = const()[name = string("op_1593_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1593_cast_fp16 = tile(reps = var_1593_reps_0, x = var_1585_cast_fp16)[name = string("op_1593_cast_fp16")]; tensor var_1598 = const()[name = string("op_1598"), val = tensor([1, 16, 128, 256])]; tensor kc_11_cast_fp16 = reshape(shape = var_1598, x = var_1593_cast_fp16)[name = string("kc_11_cast_fp16")]; tensor var_1601_axes_0 = const()[name = string("op_1601_axes_0"), val = tensor([2])]; tensor var_1601_cast_fp16 = expand_dims(axes = var_1601_axes_0, x = vc_9_cast_fp16)[name = string("op_1601_cast_fp16")]; tensor var_1609_reps_0 = const()[name = string("op_1609_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1609_cast_fp16 = tile(reps = var_1609_reps_0, x = var_1601_cast_fp16)[name = string("op_1609_cast_fp16")]; tensor var_1614 = const()[name = string("op_1614"), val = tensor([1, 16, 128, 256])]; tensor vc_11_cast_fp16 = reshape(shape = var_1614, x = var_1609_cast_fp16)[name = string("vc_11_cast_fp16")]; bool var_1616_transpose_x_0 = const()[name = string("op_1616_transpose_x_0"), val = bool(false)]; bool var_1616_transpose_y_0 = const()[name = string("op_1616_transpose_y_0"), val = bool(false)]; tensor var_1616_cast_fp16 = matmul(transpose_x = var_1616_transpose_x_0, transpose_y = var_1616_transpose_y_0, x = q_15_cast_fp16, y = kc_11_cast_fp16)[name = string("op_1616_cast_fp16")]; fp16 _inversed_attn_weights_9_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_9_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_9_cast_fp16 = mul(x = var_1616_cast_fp16, y = _inversed_attn_weights_9_y_0_to_fp16)[name = string("_inversed_attn_weights_9_cast_fp16")]; tensor attn_weights_11_cast_fp16 = add(x = _inversed_attn_weights_9_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; int32 var_1630 = const()[name = string("op_1630"), val = int32(-1)]; tensor attn_weights_15_cast_fp16 = softmax(axis = var_1630, x = attn_weights_11_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_5_transpose_x_1 = const()[name = string("attn_output_5_transpose_x_1"), val = bool(false)]; bool attn_output_5_transpose_y_1 = const()[name = string("attn_output_5_transpose_y_1"), val = bool(true)]; tensor attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_1, transpose_y = attn_output_5_transpose_y_1, x = attn_weights_15_cast_fp16, y = vc_11_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_1639_perm_0 = const()[name = string("op_1639_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1643 = const()[name = string("op_1643"), val = tensor([1, 1, -1])]; tensor var_1639_cast_fp16 = transpose(perm = var_1639_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_104")]; tensor input_13_cast_fp16 = reshape(shape = var_1643, x = var_1639_cast_fp16)[name = string("input_13_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22046912))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_1649_axes_0 = const()[name = string("op_1649_axes_0"), val = tensor([0])]; tensor var_1649_cast_fp16 = squeeze(axes = var_1649_axes_0, x = linear_10_cast_fp16)[name = string("op_1649_cast_fp16")]; tensor var_1651_axes_0 = const()[name = string("op_1651_axes_0"), val = tensor([0])]; tensor var_1651_cast_fp16 = squeeze(axes = var_1651_axes_0, x = var_1649_cast_fp16)[name = string("op_1651_cast_fp16")]; tensor var_1653_axes_0 = const()[name = string("op_1653_axes_0"), val = tensor([-1])]; tensor var_1653_cast_fp16 = expand_dims(axes = var_1653_axes_0, x = var_1651_cast_fp16)[name = string("op_1653_cast_fp16")]; tensor attn_4d_3_axes_0 = const()[name = string("attn_4d_3_axes_0"), val = tensor([-1])]; tensor attn_4d_3_cast_fp16 = expand_dims(axes = attn_4d_3_axes_0, x = var_1653_cast_fp16)[name = string("attn_4d_3_cast_fp16")]; tensor hidden_5_cast_fp16 = add(x = hidden_3_cast_fp16, y = attn_4d_3_cast_fp16)[name = string("hidden_5_cast_fp16")]; tensor var_1659_axes_0 = const()[name = string("op_1659_axes_0"), val = tensor([-1])]; tensor var_1659_cast_fp16 = squeeze(axes = var_1659_axes_0, x = hidden_5_cast_fp16)[name = string("op_1659_cast_fp16")]; tensor var_1661_axes_0 = const()[name = string("op_1661_axes_0"), val = tensor([-1])]; tensor var_1661_cast_fp16 = squeeze(axes = var_1661_axes_0, x = var_1659_cast_fp16)[name = string("op_1661_cast_fp16")]; tensor hidden_states_43_axes_0 = const()[name = string("hidden_states_43_axes_0"), val = tensor([0])]; tensor hidden_states_43_cast_fp16 = expand_dims(axes = hidden_states_43_axes_0, x = var_1661_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; fp16 var_1667_promoted_to_fp16 = const()[name = string("op_1667_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1673_cast_fp16 = pow(x = hidden_states_43_cast_fp16, y = var_1667_promoted_to_fp16)[name = string("op_1673_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_1673_cast_fp16)[name = string("variance_15_cast_fp16")]; fp16 var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1677_cast_fp16 = add(x = variance_15_cast_fp16, y = var_1676_to_fp16)[name = string("op_1677_cast_fp16")]; fp32 var_1678_epsilon_0 = const()[name = string("op_1678_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1678_cast_fp16 = rsqrt(epsilon = var_1678_epsilon_0, x = var_1677_cast_fp16)[name = string("op_1678_cast_fp16")]; tensor hidden_states_47_cast_fp16 = mul(x = hidden_states_43_cast_fp16, y = var_1678_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22047488)))]; tensor input_15_cast_fp16 = mul(x = const_20_to_fp16, y = hidden_states_47_cast_fp16)[name = string("input_15_cast_fp16")]; tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195392))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_1688_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_1688_cast_fp16")]; tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28341760))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_19_cast_fp16 = mul(x = var_1688_cast_fp16, y = linear_12_cast_fp16)[name = string("input_19_cast_fp16")]; tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28342336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488128))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor var_1695_axes_0 = const()[name = string("op_1695_axes_0"), val = tensor([0])]; tensor var_1695_cast_fp16 = squeeze(axes = var_1695_axes_0, x = linear_13_cast_fp16)[name = string("op_1695_cast_fp16")]; tensor var_1697_axes_0 = const()[name = string("op_1697_axes_0"), val = tensor([0])]; tensor var_1697_cast_fp16 = squeeze(axes = var_1697_axes_0, x = var_1695_cast_fp16)[name = string("op_1697_cast_fp16")]; tensor var_1699_axes_0 = const()[name = string("op_1699_axes_0"), val = tensor([-1])]; tensor var_1699_cast_fp16 = expand_dims(axes = var_1699_axes_0, x = var_1697_cast_fp16)[name = string("op_1699_cast_fp16")]; tensor mlp_4d_3_axes_0 = const()[name = string("mlp_4d_3_axes_0"), val = tensor([-1])]; tensor mlp_4d_3_cast_fp16 = expand_dims(axes = mlp_4d_3_axes_0, x = var_1699_cast_fp16)[name = string("mlp_4d_3_cast_fp16")]; tensor hidden_7_cast_fp16 = add(x = hidden_5_cast_fp16, y = mlp_4d_3_cast_fp16)[name = string("hidden_7_cast_fp16")]; tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1713_end_0 = const()[name = string("op_1713_end_0"), val = tensor([1, 3072, 1, 256])]; tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = var_1713_end_0, end_mask = var_1713_end_mask_0, x = key_cache)[name = string("op_1713_cast_fp16")]; tensor var_1733_begin_0 = const()[name = string("op_1733_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1733_end_0 = const()[name = string("op_1733_end_0"), val = tensor([1, 3072, 1, 256])]; tensor var_1733_end_mask_0 = const()[name = string("op_1733_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1733_cast_fp16 = slice_by_index(begin = var_1733_begin_0, end = var_1733_end_0, end_mask = var_1733_end_mask_0, x = value_cache)[name = string("op_1733_cast_fp16")]; tensor var_1745_axes_0 = const()[name = string("op_1745_axes_0"), val = tensor([-1])]; tensor var_1745_cast_fp16 = squeeze(axes = var_1745_axes_0, x = hidden_7_cast_fp16)[name = string("op_1745_cast_fp16")]; tensor var_1747_axes_0 = const()[name = string("op_1747_axes_0"), val = tensor([-1])]; tensor var_1747_cast_fp16 = squeeze(axes = var_1747_axes_0, x = var_1745_cast_fp16)[name = string("op_1747_cast_fp16")]; tensor hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor([0])]; tensor hidden_states_49_cast_fp16 = expand_dims(axes = hidden_states_49_axes_0, x = var_1747_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; fp16 var_1753_promoted_to_fp16 = const()[name = string("op_1753_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1759_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_1753_promoted_to_fp16)[name = string("op_1759_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1759_cast_fp16)[name = string("variance_17_cast_fp16")]; fp16 var_1762_to_fp16 = const()[name = string("op_1762_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1763_cast_fp16 = add(x = variance_17_cast_fp16, y = var_1762_to_fp16)[name = string("op_1763_cast_fp16")]; fp32 var_1764_epsilon_0 = const()[name = string("op_1764_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1764_cast_fp16 = rsqrt(epsilon = var_1764_epsilon_0, x = var_1763_cast_fp16)[name = string("op_1764_cast_fp16")]; tensor hidden_states_53_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = var_1764_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488704)))]; tensor input_21_cast_fp16 = mul(x = const_21_to_fp16, y = hidden_states_53_cast_fp16)[name = string("input_21_cast_fp16")]; tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588032))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637248))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35686464))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_1781 = const()[name = string("op_1781"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_55_cast_fp16 = reshape(shape = var_1781, x = linear_14_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; tensor var_1787 = const()[name = string("op_1787"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_61_cast_fp16 = reshape(shape = var_1787, x = linear_15_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor var_1793 = const()[name = string("op_1793"), val = tensor([1, 1, 8, 128])]; tensor v_15_cast_fp16 = reshape(shape = var_1793, x = linear_16_cast_fp16)[name = string("v_15_cast_fp16")]; fp16 var_1798_promoted_to_fp16 = const()[name = string("op_1798_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1804_cast_fp16 = pow(x = hidden_states_55_cast_fp16, y = var_1798_promoted_to_fp16)[name = string("op_1804_cast_fp16")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1804_cast_fp16)[name = string("variance_19_cast_fp16")]; fp16 var_1807_to_fp16 = const()[name = string("op_1807_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1808_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1807_to_fp16)[name = string("op_1808_cast_fp16")]; fp32 var_1809_epsilon_0 = const()[name = string("op_1809_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1809_cast_fp16 = rsqrt(epsilon = var_1809_epsilon_0, x = var_1808_cast_fp16)[name = string("op_1809_cast_fp16")]; tensor hidden_states_59_cast_fp16 = mul(x = hidden_states_55_cast_fp16, y = var_1809_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor const_22_to_fp16 = const()[name = string("const_22_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687040)))]; tensor q_19_cast_fp16 = mul(x = const_22_to_fp16, y = hidden_states_59_cast_fp16)[name = string("q_19_cast_fp16")]; fp16 var_1816_promoted_to_fp16 = const()[name = string("op_1816_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1822_cast_fp16 = pow(x = hidden_states_61_cast_fp16, y = var_1816_promoted_to_fp16)[name = string("op_1822_cast_fp16")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1822_cast_fp16)[name = string("variance_21_cast_fp16")]; fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1826_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1825_to_fp16)[name = string("op_1826_cast_fp16")]; fp32 var_1827_epsilon_0 = const()[name = string("op_1827_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1827_cast_fp16 = rsqrt(epsilon = var_1827_epsilon_0, x = var_1826_cast_fp16)[name = string("op_1827_cast_fp16")]; tensor hidden_states_65_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = var_1827_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor const_23_to_fp16 = const()[name = string("const_23_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687360)))]; tensor k_19_cast_fp16 = mul(x = const_23_to_fp16, y = hidden_states_65_cast_fp16)[name = string("k_19_cast_fp16")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_17_perm_0 = const()[name = string("v_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = q_19_cast_fp16)[name = string("transpose_103")]; tensor var_1844_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1844_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_21_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_21_cast_fp16)[name = string("x2_9_cast_fp16")]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1865_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_1865_cast_fp16")]; int32 var_1867 = const()[name = string("op_1867"), val = int32(-1)]; bool var_1868_interleave_0 = const()[name = string("op_1868_interleave_0"), val = bool(false)]; tensor var_1868_cast_fp16 = concat(axis = var_1867, interleave = var_1868_interleave_0, values = (var_1865_cast_fp16, x1_9_cast_fp16))[name = string("op_1868_cast_fp16")]; tensor var_1869_cast_fp16 = mul(x = var_1868_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1869_cast_fp16")]; tensor q_23_cast_fp16 = add(x = var_1844_cast_fp16, y = var_1869_cast_fp16)[name = string("q_23_cast_fp16")]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = string("transpose_102")]; tensor var_1872_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_3_cast_fp16)[name = string("op_1872_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_21_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_21_cast_fp16)[name = string("x2_11_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1893_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_1893_cast_fp16")]; int32 var_1895 = const()[name = string("op_1895"), val = int32(-1)]; bool var_1896_interleave_0 = const()[name = string("op_1896_interleave_0"), val = bool(false)]; tensor var_1896_cast_fp16 = concat(axis = var_1895, interleave = var_1896_interleave_0, values = (var_1893_cast_fp16, x1_11_cast_fp16))[name = string("op_1896_cast_fp16")]; tensor var_1897_cast_fp16 = mul(x = var_1896_cast_fp16, y = sin_3_cast_fp16)[name = string("op_1897_cast_fp16")]; tensor k_23_cast_fp16 = add(x = var_1872_cast_fp16, y = var_1897_cast_fp16)[name = string("k_23_cast_fp16")]; tensor var_1904 = const()[name = string("op_1904"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_5_cast_fp16 = reshape(shape = var_1904, x = k_23_cast_fp16)[name = string("nk_flat_5_cast_fp16")]; tensor var_1910 = const()[name = string("op_1910"), val = tensor([1, 1024, 1, 1])]; tensor v_17_cast_fp16 = transpose(perm = v_17_perm_0, x = v_15_cast_fp16)[name = string("transpose_101")]; tensor nv_flat_5_cast_fp16 = reshape(shape = var_1910, x = v_17_cast_fp16)[name = string("nv_flat_5_cast_fp16")]; tensor var_1919_cast_fp16 = mul(x = var_1713_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1919_cast_fp16")]; tensor var_1920_cast_fp16 = mul(x = nk_flat_5_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1920_cast_fp16")]; tensor key_cache_13_cast_fp16 = add(x = var_1919_cast_fp16, y = var_1920_cast_fp16)[name = string("key_cache_13_cast_fp16")]; tensor var_1926_cast_fp16 = mul(x = var_1733_cast_fp16, y = var_1194_cast_fp16)[name = string("op_1926_cast_fp16")]; tensor var_1927_cast_fp16 = mul(x = nv_flat_5_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_1927_cast_fp16")]; tensor value_cache_13_cast_fp16 = add(x = var_1926_cast_fp16, y = var_1927_cast_fp16)[name = string("value_cache_13_cast_fp16")]; tensor kc_13_axes_0 = const()[name = string("kc_13_axes_0"), val = tensor([2])]; tensor kc_13_cast_fp16 = squeeze(axes = kc_13_axes_0, x = key_cache_13_cast_fp16)[name = string("kc_13_cast_fp16")]; tensor var_1936 = const()[name = string("op_1936"), val = tensor([1, 8, 128, 256])]; tensor kc_15_cast_fp16 = reshape(shape = var_1936, x = kc_13_cast_fp16)[name = string("kc_15_cast_fp16")]; tensor vc_13_axes_0 = const()[name = string("vc_13_axes_0"), val = tensor([2])]; tensor vc_13_cast_fp16 = squeeze(axes = vc_13_axes_0, x = value_cache_13_cast_fp16)[name = string("vc_13_cast_fp16")]; tensor var_1944 = const()[name = string("op_1944"), val = tensor([1, 8, 128, 256])]; tensor vc_15_cast_fp16 = reshape(shape = var_1944, x = vc_13_cast_fp16)[name = string("vc_15_cast_fp16")]; tensor var_1947_axes_0 = const()[name = string("op_1947_axes_0"), val = tensor([2])]; tensor var_1947_cast_fp16 = expand_dims(axes = var_1947_axes_0, x = kc_15_cast_fp16)[name = string("op_1947_cast_fp16")]; tensor var_1955_reps_0 = const()[name = string("op_1955_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1955_cast_fp16 = tile(reps = var_1955_reps_0, x = var_1947_cast_fp16)[name = string("op_1955_cast_fp16")]; tensor var_1960 = const()[name = string("op_1960"), val = tensor([1, 16, 128, 256])]; tensor kc_17_cast_fp16 = reshape(shape = var_1960, x = var_1955_cast_fp16)[name = string("kc_17_cast_fp16")]; tensor var_1963_axes_0 = const()[name = string("op_1963_axes_0"), val = tensor([2])]; tensor var_1963_cast_fp16 = expand_dims(axes = var_1963_axes_0, x = vc_15_cast_fp16)[name = string("op_1963_cast_fp16")]; tensor var_1971_reps_0 = const()[name = string("op_1971_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1971_cast_fp16 = tile(reps = var_1971_reps_0, x = var_1963_cast_fp16)[name = string("op_1971_cast_fp16")]; tensor var_1976 = const()[name = string("op_1976"), val = tensor([1, 16, 128, 256])]; tensor vc_17_cast_fp16 = reshape(shape = var_1976, x = var_1971_cast_fp16)[name = string("vc_17_cast_fp16")]; bool var_1978_transpose_x_0 = const()[name = string("op_1978_transpose_x_0"), val = bool(false)]; bool var_1978_transpose_y_0 = const()[name = string("op_1978_transpose_y_0"), val = bool(false)]; tensor var_1978_cast_fp16 = matmul(transpose_x = var_1978_transpose_x_0, transpose_y = var_1978_transpose_y_0, x = q_23_cast_fp16, y = kc_17_cast_fp16)[name = string("op_1978_cast_fp16")]; fp16 _inversed_attn_weights_17_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_17_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_17_cast_fp16 = mul(x = var_1978_cast_fp16, y = _inversed_attn_weights_17_y_0_to_fp16)[name = string("_inversed_attn_weights_17_cast_fp16")]; tensor attn_weights_19_cast_fp16 = add(x = _inversed_attn_weights_17_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; int32 var_1992 = const()[name = string("op_1992"), val = int32(-1)]; tensor attn_weights_23_cast_fp16 = softmax(axis = var_1992, x = attn_weights_19_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_9_transpose_x_1 = const()[name = string("attn_output_9_transpose_x_1"), val = bool(false)]; bool attn_output_9_transpose_y_1 = const()[name = string("attn_output_9_transpose_y_1"), val = bool(true)]; tensor attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_1, transpose_y = attn_output_9_transpose_y_1, x = attn_weights_23_cast_fp16, y = vc_17_cast_fp16)[name = string("attn_output_9_cast_fp16")]; tensor var_2001_perm_0 = const()[name = string("op_2001_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2005 = const()[name = string("op_2005"), val = tensor([1, 1, -1])]; tensor var_2001_cast_fp16 = transpose(perm = var_2001_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_100")]; tensor input_23_cast_fp16 = reshape(shape = var_2005, x = var_2001_cast_fp16)[name = string("input_23_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37784896))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_2011_axes_0 = const()[name = string("op_2011_axes_0"), val = tensor([0])]; tensor var_2011_cast_fp16 = squeeze(axes = var_2011_axes_0, x = linear_17_cast_fp16)[name = string("op_2011_cast_fp16")]; tensor var_2013_axes_0 = const()[name = string("op_2013_axes_0"), val = tensor([0])]; tensor var_2013_cast_fp16 = squeeze(axes = var_2013_axes_0, x = var_2011_cast_fp16)[name = string("op_2013_cast_fp16")]; tensor var_2015_axes_0 = const()[name = string("op_2015_axes_0"), val = tensor([-1])]; tensor var_2015_cast_fp16 = expand_dims(axes = var_2015_axes_0, x = var_2013_cast_fp16)[name = string("op_2015_cast_fp16")]; tensor attn_4d_5_axes_0 = const()[name = string("attn_4d_5_axes_0"), val = tensor([-1])]; tensor attn_4d_5_cast_fp16 = expand_dims(axes = attn_4d_5_axes_0, x = var_2015_cast_fp16)[name = string("attn_4d_5_cast_fp16")]; tensor hidden_9_cast_fp16 = add(x = hidden_7_cast_fp16, y = attn_4d_5_cast_fp16)[name = string("hidden_9_cast_fp16")]; tensor var_2021_axes_0 = const()[name = string("op_2021_axes_0"), val = tensor([-1])]; tensor var_2021_cast_fp16 = squeeze(axes = var_2021_axes_0, x = hidden_9_cast_fp16)[name = string("op_2021_cast_fp16")]; tensor var_2023_axes_0 = const()[name = string("op_2023_axes_0"), val = tensor([-1])]; tensor var_2023_cast_fp16 = squeeze(axes = var_2023_axes_0, x = var_2021_cast_fp16)[name = string("op_2023_cast_fp16")]; tensor hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor([0])]; tensor hidden_states_67_cast_fp16 = expand_dims(axes = hidden_states_67_axes_0, x = var_2023_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; fp16 var_2029_promoted_to_fp16 = const()[name = string("op_2029_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2035_cast_fp16 = pow(x = hidden_states_67_cast_fp16, y = var_2029_promoted_to_fp16)[name = string("op_2035_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_2035_cast_fp16)[name = string("variance_23_cast_fp16")]; fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2039_cast_fp16 = add(x = variance_23_cast_fp16, y = var_2038_to_fp16)[name = string("op_2039_cast_fp16")]; fp32 var_2040_epsilon_0 = const()[name = string("op_2040_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2040_cast_fp16 = rsqrt(epsilon = var_2040_epsilon_0, x = var_2039_cast_fp16)[name = string("op_2040_cast_fp16")]; tensor hidden_states_71_cast_fp16 = mul(x = hidden_states_67_cast_fp16, y = var_2040_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37785472)))]; tensor input_25_cast_fp16 = mul(x = const_30_to_fp16, y = hidden_states_71_cast_fp16)[name = string("input_25_cast_fp16")]; tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933376))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_2050_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_2050_cast_fp16")]; tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44079744))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_29_cast_fp16 = mul(x = var_2050_cast_fp16, y = linear_19_cast_fp16)[name = string("input_29_cast_fp16")]; tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44080320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226112))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor var_2057_axes_0 = const()[name = string("op_2057_axes_0"), val = tensor([0])]; tensor var_2057_cast_fp16 = squeeze(axes = var_2057_axes_0, x = linear_20_cast_fp16)[name = string("op_2057_cast_fp16")]; tensor var_2059_axes_0 = const()[name = string("op_2059_axes_0"), val = tensor([0])]; tensor var_2059_cast_fp16 = squeeze(axes = var_2059_axes_0, x = var_2057_cast_fp16)[name = string("op_2059_cast_fp16")]; tensor var_2061_axes_0 = const()[name = string("op_2061_axes_0"), val = tensor([-1])]; tensor var_2061_cast_fp16 = expand_dims(axes = var_2061_axes_0, x = var_2059_cast_fp16)[name = string("op_2061_cast_fp16")]; tensor mlp_4d_5_axes_0 = const()[name = string("mlp_4d_5_axes_0"), val = tensor([-1])]; tensor mlp_4d_5_cast_fp16 = expand_dims(axes = mlp_4d_5_axes_0, x = var_2061_cast_fp16)[name = string("mlp_4d_5_cast_fp16")]; tensor hidden_11_cast_fp16 = add(x = hidden_9_cast_fp16, y = mlp_4d_5_cast_fp16)[name = string("hidden_11_cast_fp16")]; tensor var_2075_begin_0 = const()[name = string("op_2075_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_2075_end_0 = const()[name = string("op_2075_end_0"), val = tensor([1, 4096, 1, 256])]; tensor var_2075_end_mask_0 = const()[name = string("op_2075_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2075_cast_fp16 = slice_by_index(begin = var_2075_begin_0, end = var_2075_end_0, end_mask = var_2075_end_mask_0, x = key_cache)[name = string("op_2075_cast_fp16")]; tensor var_2095_begin_0 = const()[name = string("op_2095_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_2095_end_0 = const()[name = string("op_2095_end_0"), val = tensor([1, 4096, 1, 256])]; tensor var_2095_end_mask_0 = const()[name = string("op_2095_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2095_cast_fp16 = slice_by_index(begin = var_2095_begin_0, end = var_2095_end_0, end_mask = var_2095_end_mask_0, x = value_cache)[name = string("op_2095_cast_fp16")]; tensor var_2107_axes_0 = const()[name = string("op_2107_axes_0"), val = tensor([-1])]; tensor var_2107_cast_fp16 = squeeze(axes = var_2107_axes_0, x = hidden_11_cast_fp16)[name = string("op_2107_cast_fp16")]; tensor var_2109_axes_0 = const()[name = string("op_2109_axes_0"), val = tensor([-1])]; tensor var_2109_cast_fp16 = squeeze(axes = var_2109_axes_0, x = var_2107_cast_fp16)[name = string("op_2109_cast_fp16")]; tensor hidden_states_73_axes_0 = const()[name = string("hidden_states_73_axes_0"), val = tensor([0])]; tensor hidden_states_73_cast_fp16 = expand_dims(axes = hidden_states_73_axes_0, x = var_2109_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; fp16 var_2115_promoted_to_fp16 = const()[name = string("op_2115_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2121_cast_fp16 = pow(x = hidden_states_73_cast_fp16, y = var_2115_promoted_to_fp16)[name = string("op_2121_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_2121_cast_fp16)[name = string("variance_25_cast_fp16")]; fp16 var_2124_to_fp16 = const()[name = string("op_2124_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2125_cast_fp16 = add(x = variance_25_cast_fp16, y = var_2124_to_fp16)[name = string("op_2125_cast_fp16")]; fp32 var_2126_epsilon_0 = const()[name = string("op_2126_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2126_cast_fp16 = rsqrt(epsilon = var_2126_epsilon_0, x = var_2125_cast_fp16)[name = string("op_2126_cast_fp16")]; tensor hidden_states_77_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = var_2126_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226688)))]; tensor input_31_cast_fp16 = mul(x = const_31_to_fp16, y = hidden_states_77_cast_fp16)[name = string("input_31_cast_fp16")]; tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326016))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375232))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51424448))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor var_2143 = const()[name = string("op_2143"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_79_cast_fp16 = reshape(shape = var_2143, x = linear_21_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; tensor var_2149 = const()[name = string("op_2149"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_85_cast_fp16 = reshape(shape = var_2149, x = linear_22_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor var_2155 = const()[name = string("op_2155"), val = tensor([1, 1, 8, 128])]; tensor v_21_cast_fp16 = reshape(shape = var_2155, x = linear_23_cast_fp16)[name = string("v_21_cast_fp16")]; fp16 var_2160_promoted_to_fp16 = const()[name = string("op_2160_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2166_cast_fp16 = pow(x = hidden_states_79_cast_fp16, y = var_2160_promoted_to_fp16)[name = string("op_2166_cast_fp16")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_2166_cast_fp16)[name = string("variance_27_cast_fp16")]; fp16 var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2170_cast_fp16 = add(x = variance_27_cast_fp16, y = var_2169_to_fp16)[name = string("op_2170_cast_fp16")]; fp32 var_2171_epsilon_0 = const()[name = string("op_2171_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2171_cast_fp16 = rsqrt(epsilon = var_2171_epsilon_0, x = var_2170_cast_fp16)[name = string("op_2171_cast_fp16")]; tensor hidden_states_83_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = var_2171_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425024)))]; tensor q_27_cast_fp16 = mul(x = const_32_to_fp16, y = hidden_states_83_cast_fp16)[name = string("q_27_cast_fp16")]; fp16 var_2178_promoted_to_fp16 = const()[name = string("op_2178_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2184_cast_fp16 = pow(x = hidden_states_85_cast_fp16, y = var_2178_promoted_to_fp16)[name = string("op_2184_cast_fp16")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_2184_cast_fp16)[name = string("variance_29_cast_fp16")]; fp16 var_2187_to_fp16 = const()[name = string("op_2187_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2188_cast_fp16 = add(x = variance_29_cast_fp16, y = var_2187_to_fp16)[name = string("op_2188_cast_fp16")]; fp32 var_2189_epsilon_0 = const()[name = string("op_2189_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2189_cast_fp16 = rsqrt(epsilon = var_2189_epsilon_0, x = var_2188_cast_fp16)[name = string("op_2189_cast_fp16")]; tensor hidden_states_89_cast_fp16 = mul(x = hidden_states_85_cast_fp16, y = var_2189_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425344)))]; tensor k_27_cast_fp16 = mul(x = const_33_to_fp16, y = hidden_states_89_cast_fp16)[name = string("k_27_cast_fp16")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_23_perm_0 = const()[name = string("v_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = q_27_cast_fp16)[name = string("transpose_99")]; tensor var_2206_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2206_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_29_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_29_cast_fp16)[name = string("x2_13_cast_fp16")]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2227_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2227_cast_fp16")]; int32 var_2229 = const()[name = string("op_2229"), val = int32(-1)]; bool var_2230_interleave_0 = const()[name = string("op_2230_interleave_0"), val = bool(false)]; tensor var_2230_cast_fp16 = concat(axis = var_2229, interleave = var_2230_interleave_0, values = (var_2227_cast_fp16, x1_13_cast_fp16))[name = string("op_2230_cast_fp16")]; tensor var_2231_cast_fp16 = mul(x = var_2230_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2231_cast_fp16")]; tensor q_31_cast_fp16 = add(x = var_2206_cast_fp16, y = var_2231_cast_fp16)[name = string("q_31_cast_fp16")]; tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = k_27_cast_fp16)[name = string("transpose_98")]; tensor var_2234_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2234_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_29_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_29_cast_fp16)[name = string("x2_15_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2255_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_2255_cast_fp16")]; int32 var_2257 = const()[name = string("op_2257"), val = int32(-1)]; bool var_2258_interleave_0 = const()[name = string("op_2258_interleave_0"), val = bool(false)]; tensor var_2258_cast_fp16 = concat(axis = var_2257, interleave = var_2258_interleave_0, values = (var_2255_cast_fp16, x1_15_cast_fp16))[name = string("op_2258_cast_fp16")]; tensor var_2259_cast_fp16 = mul(x = var_2258_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2259_cast_fp16")]; tensor k_31_cast_fp16 = add(x = var_2234_cast_fp16, y = var_2259_cast_fp16)[name = string("k_31_cast_fp16")]; tensor var_2266 = const()[name = string("op_2266"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_7_cast_fp16 = reshape(shape = var_2266, x = k_31_cast_fp16)[name = string("nk_flat_7_cast_fp16")]; tensor var_2272 = const()[name = string("op_2272"), val = tensor([1, 1024, 1, 1])]; tensor v_23_cast_fp16 = transpose(perm = v_23_perm_0, x = v_21_cast_fp16)[name = string("transpose_97")]; tensor nv_flat_7_cast_fp16 = reshape(shape = var_2272, x = v_23_cast_fp16)[name = string("nv_flat_7_cast_fp16")]; tensor var_2281_cast_fp16 = mul(x = var_2075_cast_fp16, y = var_1194_cast_fp16)[name = string("op_2281_cast_fp16")]; tensor var_2282_cast_fp16 = mul(x = nk_flat_7_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_2282_cast_fp16")]; tensor key_cache_17_cast_fp16 = add(x = var_2281_cast_fp16, y = var_2282_cast_fp16)[name = string("key_cache_17_cast_fp16")]; tensor var_2288_cast_fp16 = mul(x = var_2095_cast_fp16, y = var_1194_cast_fp16)[name = string("op_2288_cast_fp16")]; tensor var_2289_cast_fp16 = mul(x = nv_flat_7_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_2289_cast_fp16")]; tensor value_cache_17_cast_fp16 = add(x = var_2288_cast_fp16, y = var_2289_cast_fp16)[name = string("value_cache_17_cast_fp16")]; tensor kc_19_axes_0 = const()[name = string("kc_19_axes_0"), val = tensor([2])]; tensor kc_19_cast_fp16 = squeeze(axes = kc_19_axes_0, x = key_cache_17_cast_fp16)[name = string("kc_19_cast_fp16")]; tensor var_2298 = const()[name = string("op_2298"), val = tensor([1, 8, 128, 256])]; tensor kc_21_cast_fp16 = reshape(shape = var_2298, x = kc_19_cast_fp16)[name = string("kc_21_cast_fp16")]; tensor vc_19_axes_0 = const()[name = string("vc_19_axes_0"), val = tensor([2])]; tensor vc_19_cast_fp16 = squeeze(axes = vc_19_axes_0, x = value_cache_17_cast_fp16)[name = string("vc_19_cast_fp16")]; tensor var_2306 = const()[name = string("op_2306"), val = tensor([1, 8, 128, 256])]; tensor vc_21_cast_fp16 = reshape(shape = var_2306, x = vc_19_cast_fp16)[name = string("vc_21_cast_fp16")]; tensor var_2309_axes_0 = const()[name = string("op_2309_axes_0"), val = tensor([2])]; tensor var_2309_cast_fp16 = expand_dims(axes = var_2309_axes_0, x = kc_21_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor var_2317_reps_0 = const()[name = string("op_2317_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2317_cast_fp16 = tile(reps = var_2317_reps_0, x = var_2309_cast_fp16)[name = string("op_2317_cast_fp16")]; tensor var_2322 = const()[name = string("op_2322"), val = tensor([1, 16, 128, 256])]; tensor kc_23_cast_fp16 = reshape(shape = var_2322, x = var_2317_cast_fp16)[name = string("kc_23_cast_fp16")]; tensor var_2325_axes_0 = const()[name = string("op_2325_axes_0"), val = tensor([2])]; tensor var_2325_cast_fp16 = expand_dims(axes = var_2325_axes_0, x = vc_21_cast_fp16)[name = string("op_2325_cast_fp16")]; tensor var_2333_reps_0 = const()[name = string("op_2333_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2333_cast_fp16 = tile(reps = var_2333_reps_0, x = var_2325_cast_fp16)[name = string("op_2333_cast_fp16")]; tensor var_2338 = const()[name = string("op_2338"), val = tensor([1, 16, 128, 256])]; tensor vc_23_cast_fp16 = reshape(shape = var_2338, x = var_2333_cast_fp16)[name = string("vc_23_cast_fp16")]; bool var_2340_transpose_x_0 = const()[name = string("op_2340_transpose_x_0"), val = bool(false)]; bool var_2340_transpose_y_0 = const()[name = string("op_2340_transpose_y_0"), val = bool(false)]; tensor var_2340_cast_fp16 = matmul(transpose_x = var_2340_transpose_x_0, transpose_y = var_2340_transpose_y_0, x = q_31_cast_fp16, y = kc_23_cast_fp16)[name = string("op_2340_cast_fp16")]; fp16 _inversed_attn_weights_25_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_25_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_25_cast_fp16 = mul(x = var_2340_cast_fp16, y = _inversed_attn_weights_25_y_0_to_fp16)[name = string("_inversed_attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = _inversed_attn_weights_25_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; int32 var_2354 = const()[name = string("op_2354"), val = int32(-1)]; tensor attn_weights_31_cast_fp16 = softmax(axis = var_2354, x = attn_weights_27_cast_fp16)[name = string("attn_weights_31_cast_fp16")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(false)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(true)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_31_cast_fp16, y = vc_23_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_2363_perm_0 = const()[name = string("op_2363_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2367 = const()[name = string("op_2367"), val = tensor([1, 1, -1])]; tensor var_2363_cast_fp16 = transpose(perm = var_2363_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_96")]; tensor input_33_cast_fp16 = reshape(shape = var_2367, x = var_2363_cast_fp16)[name = string("input_33_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53522880))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_2373_axes_0 = const()[name = string("op_2373_axes_0"), val = tensor([0])]; tensor var_2373_cast_fp16 = squeeze(axes = var_2373_axes_0, x = linear_24_cast_fp16)[name = string("op_2373_cast_fp16")]; tensor var_2375_axes_0 = const()[name = string("op_2375_axes_0"), val = tensor([0])]; tensor var_2375_cast_fp16 = squeeze(axes = var_2375_axes_0, x = var_2373_cast_fp16)[name = string("op_2375_cast_fp16")]; tensor var_2377_axes_0 = const()[name = string("op_2377_axes_0"), val = tensor([-1])]; tensor var_2377_cast_fp16 = expand_dims(axes = var_2377_axes_0, x = var_2375_cast_fp16)[name = string("op_2377_cast_fp16")]; tensor attn_4d_7_axes_0 = const()[name = string("attn_4d_7_axes_0"), val = tensor([-1])]; tensor attn_4d_7_cast_fp16 = expand_dims(axes = attn_4d_7_axes_0, x = var_2377_cast_fp16)[name = string("attn_4d_7_cast_fp16")]; tensor hidden_13_cast_fp16 = add(x = hidden_11_cast_fp16, y = attn_4d_7_cast_fp16)[name = string("hidden_13_cast_fp16")]; tensor var_2383_axes_0 = const()[name = string("op_2383_axes_0"), val = tensor([-1])]; tensor var_2383_cast_fp16 = squeeze(axes = var_2383_axes_0, x = hidden_13_cast_fp16)[name = string("op_2383_cast_fp16")]; tensor var_2385_axes_0 = const()[name = string("op_2385_axes_0"), val = tensor([-1])]; tensor var_2385_cast_fp16 = squeeze(axes = var_2385_axes_0, x = var_2383_cast_fp16)[name = string("op_2385_cast_fp16")]; tensor hidden_states_91_axes_0 = const()[name = string("hidden_states_91_axes_0"), val = tensor([0])]; tensor hidden_states_91_cast_fp16 = expand_dims(axes = hidden_states_91_axes_0, x = var_2385_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; fp16 var_2391_promoted_to_fp16 = const()[name = string("op_2391_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2397_cast_fp16 = pow(x = hidden_states_91_cast_fp16, y = var_2391_promoted_to_fp16)[name = string("op_2397_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_2397_cast_fp16)[name = string("variance_31_cast_fp16")]; fp16 var_2400_to_fp16 = const()[name = string("op_2400_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2401_cast_fp16 = add(x = variance_31_cast_fp16, y = var_2400_to_fp16)[name = string("op_2401_cast_fp16")]; fp32 var_2402_epsilon_0 = const()[name = string("op_2402_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2402_cast_fp16 = rsqrt(epsilon = var_2402_epsilon_0, x = var_2401_cast_fp16)[name = string("op_2402_cast_fp16")]; tensor hidden_states_95_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = var_2402_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53523456)))]; tensor input_35_cast_fp16 = mul(x = const_40_to_fp16, y = hidden_states_95_cast_fp16)[name = string("input_35_cast_fp16")]; tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671360))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_2412_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_2412_cast_fp16")]; tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59817728))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_2412_cast_fp16, y = linear_26_cast_fp16)[name = string("input_39_cast_fp16")]; tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964096))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor var_2419_axes_0 = const()[name = string("op_2419_axes_0"), val = tensor([0])]; tensor var_2419_cast_fp16 = squeeze(axes = var_2419_axes_0, x = linear_27_cast_fp16)[name = string("op_2419_cast_fp16")]; tensor var_2421_axes_0 = const()[name = string("op_2421_axes_0"), val = tensor([0])]; tensor var_2421_cast_fp16 = squeeze(axes = var_2421_axes_0, x = var_2419_cast_fp16)[name = string("op_2421_cast_fp16")]; tensor var_2423_axes_0 = const()[name = string("op_2423_axes_0"), val = tensor([-1])]; tensor var_2423_cast_fp16 = expand_dims(axes = var_2423_axes_0, x = var_2421_cast_fp16)[name = string("op_2423_cast_fp16")]; tensor mlp_4d_7_axes_0 = const()[name = string("mlp_4d_7_axes_0"), val = tensor([-1])]; tensor mlp_4d_7_cast_fp16 = expand_dims(axes = mlp_4d_7_axes_0, x = var_2423_cast_fp16)[name = string("mlp_4d_7_cast_fp16")]; tensor hidden_15_cast_fp16 = add(x = hidden_13_cast_fp16, y = mlp_4d_7_cast_fp16)[name = string("hidden_15_cast_fp16")]; tensor var_2437_begin_0 = const()[name = string("op_2437_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_2437_end_0 = const()[name = string("op_2437_end_0"), val = tensor([1, 5120, 1, 256])]; tensor var_2437_end_mask_0 = const()[name = string("op_2437_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2437_cast_fp16 = slice_by_index(begin = var_2437_begin_0, end = var_2437_end_0, end_mask = var_2437_end_mask_0, x = key_cache)[name = string("op_2437_cast_fp16")]; tensor var_2457_begin_0 = const()[name = string("op_2457_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_2457_end_0 = const()[name = string("op_2457_end_0"), val = tensor([1, 5120, 1, 256])]; tensor var_2457_end_mask_0 = const()[name = string("op_2457_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2457_cast_fp16 = slice_by_index(begin = var_2457_begin_0, end = var_2457_end_0, end_mask = var_2457_end_mask_0, x = value_cache)[name = string("op_2457_cast_fp16")]; tensor var_2469_axes_0 = const()[name = string("op_2469_axes_0"), val = tensor([-1])]; tensor var_2469_cast_fp16 = squeeze(axes = var_2469_axes_0, x = hidden_15_cast_fp16)[name = string("op_2469_cast_fp16")]; tensor var_2471_axes_0 = const()[name = string("op_2471_axes_0"), val = tensor([-1])]; tensor var_2471_cast_fp16 = squeeze(axes = var_2471_axes_0, x = var_2469_cast_fp16)[name = string("op_2471_cast_fp16")]; tensor hidden_states_97_axes_0 = const()[name = string("hidden_states_97_axes_0"), val = tensor([0])]; tensor hidden_states_97_cast_fp16 = expand_dims(axes = hidden_states_97_axes_0, x = var_2471_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; fp16 var_2477_promoted_to_fp16 = const()[name = string("op_2477_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2483_cast_fp16 = pow(x = hidden_states_97_cast_fp16, y = var_2477_promoted_to_fp16)[name = string("op_2483_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_2483_cast_fp16)[name = string("variance_33_cast_fp16")]; fp16 var_2486_to_fp16 = const()[name = string("op_2486_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2487_cast_fp16 = add(x = variance_33_cast_fp16, y = var_2486_to_fp16)[name = string("op_2487_cast_fp16")]; fp32 var_2488_epsilon_0 = const()[name = string("op_2488_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2488_cast_fp16 = rsqrt(epsilon = var_2488_epsilon_0, x = var_2487_cast_fp16)[name = string("op_2488_cast_fp16")]; tensor hidden_states_101_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = var_2488_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964672)))]; tensor input_41_cast_fp16 = mul(x = const_41_to_fp16, y = hidden_states_101_cast_fp16)[name = string("input_41_cast_fp16")]; tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064000))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113216))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67162432))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor var_2505 = const()[name = string("op_2505"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_103_cast_fp16 = reshape(shape = var_2505, x = linear_28_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; tensor var_2511 = const()[name = string("op_2511"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_109_cast_fp16 = reshape(shape = var_2511, x = linear_29_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor var_2517 = const()[name = string("op_2517"), val = tensor([1, 1, 8, 128])]; tensor v_27_cast_fp16 = reshape(shape = var_2517, x = linear_30_cast_fp16)[name = string("v_27_cast_fp16")]; fp16 var_2522_promoted_to_fp16 = const()[name = string("op_2522_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2528_cast_fp16 = pow(x = hidden_states_103_cast_fp16, y = var_2522_promoted_to_fp16)[name = string("op_2528_cast_fp16")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_2528_cast_fp16)[name = string("variance_35_cast_fp16")]; fp16 var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2532_cast_fp16 = add(x = variance_35_cast_fp16, y = var_2531_to_fp16)[name = string("op_2532_cast_fp16")]; fp32 var_2533_epsilon_0 = const()[name = string("op_2533_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2533_cast_fp16 = rsqrt(epsilon = var_2533_epsilon_0, x = var_2532_cast_fp16)[name = string("op_2533_cast_fp16")]; tensor hidden_states_107_cast_fp16 = mul(x = hidden_states_103_cast_fp16, y = var_2533_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163008)))]; tensor q_35_cast_fp16 = mul(x = const_42_to_fp16, y = hidden_states_107_cast_fp16)[name = string("q_35_cast_fp16")]; fp16 var_2540_promoted_to_fp16 = const()[name = string("op_2540_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2546_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_2540_promoted_to_fp16)[name = string("op_2546_cast_fp16")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_2546_cast_fp16)[name = string("variance_37_cast_fp16")]; fp16 var_2549_to_fp16 = const()[name = string("op_2549_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2550_cast_fp16 = add(x = variance_37_cast_fp16, y = var_2549_to_fp16)[name = string("op_2550_cast_fp16")]; fp32 var_2551_epsilon_0 = const()[name = string("op_2551_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2551_cast_fp16 = rsqrt(epsilon = var_2551_epsilon_0, x = var_2550_cast_fp16)[name = string("op_2551_cast_fp16")]; tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_2551_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163328)))]; tensor k_35_cast_fp16 = mul(x = const_43_to_fp16, y = hidden_states_113_cast_fp16)[name = string("k_35_cast_fp16")]; tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_29_perm_0 = const()[name = string("v_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = q_35_cast_fp16)[name = string("transpose_95")]; tensor var_2568_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2568_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_37_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_37_cast_fp16)[name = string("x2_17_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2589_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2589_cast_fp16")]; int32 var_2591 = const()[name = string("op_2591"), val = int32(-1)]; bool var_2592_interleave_0 = const()[name = string("op_2592_interleave_0"), val = bool(false)]; tensor var_2592_cast_fp16 = concat(axis = var_2591, interleave = var_2592_interleave_0, values = (var_2589_cast_fp16, x1_17_cast_fp16))[name = string("op_2592_cast_fp16")]; tensor var_2593_cast_fp16 = mul(x = var_2592_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2593_cast_fp16")]; tensor q_39_cast_fp16 = add(x = var_2568_cast_fp16, y = var_2593_cast_fp16)[name = string("q_39_cast_fp16")]; tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = k_35_cast_fp16)[name = string("transpose_94")]; tensor var_2596_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2596_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_37_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_37_cast_fp16)[name = string("x2_19_cast_fp16")]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2617_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_2617_cast_fp16")]; int32 var_2619 = const()[name = string("op_2619"), val = int32(-1)]; bool var_2620_interleave_0 = const()[name = string("op_2620_interleave_0"), val = bool(false)]; tensor var_2620_cast_fp16 = concat(axis = var_2619, interleave = var_2620_interleave_0, values = (var_2617_cast_fp16, x1_19_cast_fp16))[name = string("op_2620_cast_fp16")]; tensor var_2621_cast_fp16 = mul(x = var_2620_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2621_cast_fp16")]; tensor k_39_cast_fp16 = add(x = var_2596_cast_fp16, y = var_2621_cast_fp16)[name = string("k_39_cast_fp16")]; tensor var_2628 = const()[name = string("op_2628"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_9_cast_fp16 = reshape(shape = var_2628, x = k_39_cast_fp16)[name = string("nk_flat_9_cast_fp16")]; tensor var_2634 = const()[name = string("op_2634"), val = tensor([1, 1024, 1, 1])]; tensor v_29_cast_fp16 = transpose(perm = v_29_perm_0, x = v_27_cast_fp16)[name = string("transpose_93")]; tensor nv_flat_9_cast_fp16 = reshape(shape = var_2634, x = v_29_cast_fp16)[name = string("nv_flat_9_cast_fp16")]; tensor var_2643_cast_fp16 = mul(x = var_2437_cast_fp16, y = var_1194_cast_fp16)[name = string("op_2643_cast_fp16")]; tensor var_2644_cast_fp16 = mul(x = nk_flat_9_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_2644_cast_fp16")]; tensor key_cache_21_cast_fp16 = add(x = var_2643_cast_fp16, y = var_2644_cast_fp16)[name = string("key_cache_21_cast_fp16")]; tensor var_2650_cast_fp16 = mul(x = var_2457_cast_fp16, y = var_1194_cast_fp16)[name = string("op_2650_cast_fp16")]; tensor var_2651_cast_fp16 = mul(x = nv_flat_9_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_2651_cast_fp16")]; tensor value_cache_21_cast_fp16 = add(x = var_2650_cast_fp16, y = var_2651_cast_fp16)[name = string("value_cache_21_cast_fp16")]; tensor kc_25_axes_0 = const()[name = string("kc_25_axes_0"), val = tensor([2])]; tensor kc_25_cast_fp16 = squeeze(axes = kc_25_axes_0, x = key_cache_21_cast_fp16)[name = string("kc_25_cast_fp16")]; tensor var_2660 = const()[name = string("op_2660"), val = tensor([1, 8, 128, 256])]; tensor kc_27_cast_fp16 = reshape(shape = var_2660, x = kc_25_cast_fp16)[name = string("kc_27_cast_fp16")]; tensor vc_25_axes_0 = const()[name = string("vc_25_axes_0"), val = tensor([2])]; tensor vc_25_cast_fp16 = squeeze(axes = vc_25_axes_0, x = value_cache_21_cast_fp16)[name = string("vc_25_cast_fp16")]; tensor var_2668 = const()[name = string("op_2668"), val = tensor([1, 8, 128, 256])]; tensor vc_27_cast_fp16 = reshape(shape = var_2668, x = vc_25_cast_fp16)[name = string("vc_27_cast_fp16")]; tensor var_2671_axes_0 = const()[name = string("op_2671_axes_0"), val = tensor([2])]; tensor var_2671_cast_fp16 = expand_dims(axes = var_2671_axes_0, x = kc_27_cast_fp16)[name = string("op_2671_cast_fp16")]; tensor var_2679_reps_0 = const()[name = string("op_2679_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2679_cast_fp16 = tile(reps = var_2679_reps_0, x = var_2671_cast_fp16)[name = string("op_2679_cast_fp16")]; tensor var_2684 = const()[name = string("op_2684"), val = tensor([1, 16, 128, 256])]; tensor kc_29_cast_fp16 = reshape(shape = var_2684, x = var_2679_cast_fp16)[name = string("kc_29_cast_fp16")]; tensor var_2687_axes_0 = const()[name = string("op_2687_axes_0"), val = tensor([2])]; tensor var_2687_cast_fp16 = expand_dims(axes = var_2687_axes_0, x = vc_27_cast_fp16)[name = string("op_2687_cast_fp16")]; tensor var_2695_reps_0 = const()[name = string("op_2695_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2695_cast_fp16 = tile(reps = var_2695_reps_0, x = var_2687_cast_fp16)[name = string("op_2695_cast_fp16")]; tensor var_2700 = const()[name = string("op_2700"), val = tensor([1, 16, 128, 256])]; tensor vc_29_cast_fp16 = reshape(shape = var_2700, x = var_2695_cast_fp16)[name = string("vc_29_cast_fp16")]; bool var_2702_transpose_x_0 = const()[name = string("op_2702_transpose_x_0"), val = bool(false)]; bool var_2702_transpose_y_0 = const()[name = string("op_2702_transpose_y_0"), val = bool(false)]; tensor var_2702_cast_fp16 = matmul(transpose_x = var_2702_transpose_x_0, transpose_y = var_2702_transpose_y_0, x = q_39_cast_fp16, y = kc_29_cast_fp16)[name = string("op_2702_cast_fp16")]; fp16 _inversed_attn_weights_33_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_33_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_33_cast_fp16 = mul(x = var_2702_cast_fp16, y = _inversed_attn_weights_33_y_0_to_fp16)[name = string("_inversed_attn_weights_33_cast_fp16")]; tensor attn_weights_35_cast_fp16 = add(x = _inversed_attn_weights_33_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; int32 var_2716 = const()[name = string("op_2716"), val = int32(-1)]; tensor attn_weights_39_cast_fp16 = softmax(axis = var_2716, x = attn_weights_35_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; bool attn_output_17_transpose_x_1 = const()[name = string("attn_output_17_transpose_x_1"), val = bool(false)]; bool attn_output_17_transpose_y_1 = const()[name = string("attn_output_17_transpose_y_1"), val = bool(true)]; tensor attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_1, transpose_y = attn_output_17_transpose_y_1, x = attn_weights_39_cast_fp16, y = vc_29_cast_fp16)[name = string("attn_output_17_cast_fp16")]; tensor var_2725_perm_0 = const()[name = string("op_2725_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2729 = const()[name = string("op_2729"), val = tensor([1, 1, -1])]; tensor var_2725_cast_fp16 = transpose(perm = var_2725_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_92")]; tensor input_43_cast_fp16 = reshape(shape = var_2729, x = var_2725_cast_fp16)[name = string("input_43_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69260864))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor var_2735_axes_0 = const()[name = string("op_2735_axes_0"), val = tensor([0])]; tensor var_2735_cast_fp16 = squeeze(axes = var_2735_axes_0, x = linear_31_cast_fp16)[name = string("op_2735_cast_fp16")]; tensor var_2737_axes_0 = const()[name = string("op_2737_axes_0"), val = tensor([0])]; tensor var_2737_cast_fp16 = squeeze(axes = var_2737_axes_0, x = var_2735_cast_fp16)[name = string("op_2737_cast_fp16")]; tensor var_2739_axes_0 = const()[name = string("op_2739_axes_0"), val = tensor([-1])]; tensor var_2739_cast_fp16 = expand_dims(axes = var_2739_axes_0, x = var_2737_cast_fp16)[name = string("op_2739_cast_fp16")]; tensor attn_4d_9_axes_0 = const()[name = string("attn_4d_9_axes_0"), val = tensor([-1])]; tensor attn_4d_9_cast_fp16 = expand_dims(axes = attn_4d_9_axes_0, x = var_2739_cast_fp16)[name = string("attn_4d_9_cast_fp16")]; tensor hidden_17_cast_fp16 = add(x = hidden_15_cast_fp16, y = attn_4d_9_cast_fp16)[name = string("hidden_17_cast_fp16")]; tensor var_2745_axes_0 = const()[name = string("op_2745_axes_0"), val = tensor([-1])]; tensor var_2745_cast_fp16 = squeeze(axes = var_2745_axes_0, x = hidden_17_cast_fp16)[name = string("op_2745_cast_fp16")]; tensor var_2747_axes_0 = const()[name = string("op_2747_axes_0"), val = tensor([-1])]; tensor var_2747_cast_fp16 = squeeze(axes = var_2747_axes_0, x = var_2745_cast_fp16)[name = string("op_2747_cast_fp16")]; tensor hidden_states_115_axes_0 = const()[name = string("hidden_states_115_axes_0"), val = tensor([0])]; tensor hidden_states_115_cast_fp16 = expand_dims(axes = hidden_states_115_axes_0, x = var_2747_cast_fp16)[name = string("hidden_states_115_cast_fp16")]; fp16 var_2753_promoted_to_fp16 = const()[name = string("op_2753_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2759_cast_fp16 = pow(x = hidden_states_115_cast_fp16, y = var_2753_promoted_to_fp16)[name = string("op_2759_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2759_cast_fp16)[name = string("variance_39_cast_fp16")]; fp16 var_2762_to_fp16 = const()[name = string("op_2762_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2763_cast_fp16 = add(x = variance_39_cast_fp16, y = var_2762_to_fp16)[name = string("op_2763_cast_fp16")]; fp32 var_2764_epsilon_0 = const()[name = string("op_2764_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2764_cast_fp16 = rsqrt(epsilon = var_2764_epsilon_0, x = var_2763_cast_fp16)[name = string("op_2764_cast_fp16")]; tensor hidden_states_119_cast_fp16 = mul(x = hidden_states_115_cast_fp16, y = var_2764_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69261440)))]; tensor input_45_cast_fp16 = mul(x = const_50_to_fp16, y = hidden_states_119_cast_fp16)[name = string("input_45_cast_fp16")]; tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409344))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_2774_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_2774_cast_fp16")]; tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75555712))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_49_cast_fp16 = mul(x = var_2774_cast_fp16, y = linear_33_cast_fp16)[name = string("input_49_cast_fp16")]; tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702080))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_2781_axes_0 = const()[name = string("op_2781_axes_0"), val = tensor([0])]; tensor var_2781_cast_fp16 = squeeze(axes = var_2781_axes_0, x = linear_34_cast_fp16)[name = string("op_2781_cast_fp16")]; tensor var_2783_axes_0 = const()[name = string("op_2783_axes_0"), val = tensor([0])]; tensor var_2783_cast_fp16 = squeeze(axes = var_2783_axes_0, x = var_2781_cast_fp16)[name = string("op_2783_cast_fp16")]; tensor var_2785_axes_0 = const()[name = string("op_2785_axes_0"), val = tensor([-1])]; tensor var_2785_cast_fp16 = expand_dims(axes = var_2785_axes_0, x = var_2783_cast_fp16)[name = string("op_2785_cast_fp16")]; tensor mlp_4d_9_axes_0 = const()[name = string("mlp_4d_9_axes_0"), val = tensor([-1])]; tensor mlp_4d_9_cast_fp16 = expand_dims(axes = mlp_4d_9_axes_0, x = var_2785_cast_fp16)[name = string("mlp_4d_9_cast_fp16")]; tensor hidden_19_cast_fp16 = add(x = hidden_17_cast_fp16, y = mlp_4d_9_cast_fp16)[name = string("hidden_19_cast_fp16")]; tensor var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor([0, 5120, 0, 0])]; tensor var_2799_end_0 = const()[name = string("op_2799_end_0"), val = tensor([1, 6144, 1, 256])]; tensor var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = var_2799_end_0, end_mask = var_2799_end_mask_0, x = key_cache)[name = string("op_2799_cast_fp16")]; tensor var_2819_begin_0 = const()[name = string("op_2819_begin_0"), val = tensor([0, 5120, 0, 0])]; tensor var_2819_end_0 = const()[name = string("op_2819_end_0"), val = tensor([1, 6144, 1, 256])]; tensor var_2819_end_mask_0 = const()[name = string("op_2819_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2819_cast_fp16 = slice_by_index(begin = var_2819_begin_0, end = var_2819_end_0, end_mask = var_2819_end_mask_0, x = value_cache)[name = string("op_2819_cast_fp16")]; tensor var_2831_axes_0 = const()[name = string("op_2831_axes_0"), val = tensor([-1])]; tensor var_2831_cast_fp16 = squeeze(axes = var_2831_axes_0, x = hidden_19_cast_fp16)[name = string("op_2831_cast_fp16")]; tensor var_2833_axes_0 = const()[name = string("op_2833_axes_0"), val = tensor([-1])]; tensor var_2833_cast_fp16 = squeeze(axes = var_2833_axes_0, x = var_2831_cast_fp16)[name = string("op_2833_cast_fp16")]; tensor hidden_states_121_axes_0 = const()[name = string("hidden_states_121_axes_0"), val = tensor([0])]; tensor hidden_states_121_cast_fp16 = expand_dims(axes = hidden_states_121_axes_0, x = var_2833_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; fp16 var_2839_promoted_to_fp16 = const()[name = string("op_2839_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2845_cast_fp16 = pow(x = hidden_states_121_cast_fp16, y = var_2839_promoted_to_fp16)[name = string("op_2845_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_2845_cast_fp16)[name = string("variance_41_cast_fp16")]; fp16 var_2848_to_fp16 = const()[name = string("op_2848_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2849_cast_fp16 = add(x = variance_41_cast_fp16, y = var_2848_to_fp16)[name = string("op_2849_cast_fp16")]; fp32 var_2850_epsilon_0 = const()[name = string("op_2850_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2850_cast_fp16 = rsqrt(epsilon = var_2850_epsilon_0, x = var_2849_cast_fp16)[name = string("op_2850_cast_fp16")]; tensor hidden_states_125_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = var_2850_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702656)))]; tensor input_51_cast_fp16 = mul(x = const_51_to_fp16, y = hidden_states_125_cast_fp16)[name = string("input_51_cast_fp16")]; tensor layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80801984))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80802560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851200))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900416))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor var_2867 = const()[name = string("op_2867"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_127_cast_fp16 = reshape(shape = var_2867, x = linear_35_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; tensor var_2873 = const()[name = string("op_2873"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_133_cast_fp16 = reshape(shape = var_2873, x = linear_36_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; tensor var_2879 = const()[name = string("op_2879"), val = tensor([1, 1, 8, 128])]; tensor v_33_cast_fp16 = reshape(shape = var_2879, x = linear_37_cast_fp16)[name = string("v_33_cast_fp16")]; fp16 var_2884_promoted_to_fp16 = const()[name = string("op_2884_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2890_cast_fp16 = pow(x = hidden_states_127_cast_fp16, y = var_2884_promoted_to_fp16)[name = string("op_2890_cast_fp16")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2890_cast_fp16)[name = string("variance_43_cast_fp16")]; fp16 var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2894_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2893_to_fp16)[name = string("op_2894_cast_fp16")]; fp32 var_2895_epsilon_0 = const()[name = string("op_2895_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2895_cast_fp16 = rsqrt(epsilon = var_2895_epsilon_0, x = var_2894_cast_fp16)[name = string("op_2895_cast_fp16")]; tensor hidden_states_131_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = var_2895_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900992)))]; tensor q_43_cast_fp16 = mul(x = const_52_to_fp16, y = hidden_states_131_cast_fp16)[name = string("q_43_cast_fp16")]; fp16 var_2902_promoted_to_fp16 = const()[name = string("op_2902_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2908_cast_fp16 = pow(x = hidden_states_133_cast_fp16, y = var_2902_promoted_to_fp16)[name = string("op_2908_cast_fp16")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2908_cast_fp16)[name = string("variance_45_cast_fp16")]; fp16 var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2912_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2911_to_fp16)[name = string("op_2912_cast_fp16")]; fp32 var_2913_epsilon_0 = const()[name = string("op_2913_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2913_cast_fp16 = rsqrt(epsilon = var_2913_epsilon_0, x = var_2912_cast_fp16)[name = string("op_2913_cast_fp16")]; tensor hidden_states_137_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = var_2913_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901312)))]; tensor k_43_cast_fp16 = mul(x = const_53_to_fp16, y = hidden_states_137_cast_fp16)[name = string("k_43_cast_fp16")]; tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_35_perm_0 = const()[name = string("v_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = q_43_cast_fp16)[name = string("transpose_91")]; tensor var_2930_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2930_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_45_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_45_cast_fp16)[name = string("x2_21_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2951_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_2951_cast_fp16")]; int32 var_2953 = const()[name = string("op_2953"), val = int32(-1)]; bool var_2954_interleave_0 = const()[name = string("op_2954_interleave_0"), val = bool(false)]; tensor var_2954_cast_fp16 = concat(axis = var_2953, interleave = var_2954_interleave_0, values = (var_2951_cast_fp16, x1_21_cast_fp16))[name = string("op_2954_cast_fp16")]; tensor var_2955_cast_fp16 = mul(x = var_2954_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2955_cast_fp16")]; tensor q_47_cast_fp16 = add(x = var_2930_cast_fp16, y = var_2955_cast_fp16)[name = string("q_47_cast_fp16")]; tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = k_43_cast_fp16)[name = string("transpose_90")]; tensor var_2958_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_3_cast_fp16)[name = string("op_2958_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_45_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_45_cast_fp16)[name = string("x2_23_cast_fp16")]; fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2979_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_2979_cast_fp16")]; int32 var_2981 = const()[name = string("op_2981"), val = int32(-1)]; bool var_2982_interleave_0 = const()[name = string("op_2982_interleave_0"), val = bool(false)]; tensor var_2982_cast_fp16 = concat(axis = var_2981, interleave = var_2982_interleave_0, values = (var_2979_cast_fp16, x1_23_cast_fp16))[name = string("op_2982_cast_fp16")]; tensor var_2983_cast_fp16 = mul(x = var_2982_cast_fp16, y = sin_3_cast_fp16)[name = string("op_2983_cast_fp16")]; tensor k_47_cast_fp16 = add(x = var_2958_cast_fp16, y = var_2983_cast_fp16)[name = string("k_47_cast_fp16")]; tensor var_2990 = const()[name = string("op_2990"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_11_cast_fp16 = reshape(shape = var_2990, x = k_47_cast_fp16)[name = string("nk_flat_11_cast_fp16")]; tensor var_2996 = const()[name = string("op_2996"), val = tensor([1, 1024, 1, 1])]; tensor v_35_cast_fp16 = transpose(perm = v_35_perm_0, x = v_33_cast_fp16)[name = string("transpose_89")]; tensor nv_flat_11_cast_fp16 = reshape(shape = var_2996, x = v_35_cast_fp16)[name = string("nv_flat_11_cast_fp16")]; tensor var_3005_cast_fp16 = mul(x = var_2799_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3005_cast_fp16")]; tensor var_3006_cast_fp16 = mul(x = nk_flat_11_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3006_cast_fp16")]; tensor key_cache_25_cast_fp16 = add(x = var_3005_cast_fp16, y = var_3006_cast_fp16)[name = string("key_cache_25_cast_fp16")]; tensor var_3012_cast_fp16 = mul(x = var_2819_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3012_cast_fp16")]; tensor var_3013_cast_fp16 = mul(x = nv_flat_11_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3013_cast_fp16")]; tensor value_cache_25_cast_fp16 = add(x = var_3012_cast_fp16, y = var_3013_cast_fp16)[name = string("value_cache_25_cast_fp16")]; tensor kc_31_axes_0 = const()[name = string("kc_31_axes_0"), val = tensor([2])]; tensor kc_31_cast_fp16 = squeeze(axes = kc_31_axes_0, x = key_cache_25_cast_fp16)[name = string("kc_31_cast_fp16")]; tensor var_3022 = const()[name = string("op_3022"), val = tensor([1, 8, 128, 256])]; tensor kc_33_cast_fp16 = reshape(shape = var_3022, x = kc_31_cast_fp16)[name = string("kc_33_cast_fp16")]; tensor vc_31_axes_0 = const()[name = string("vc_31_axes_0"), val = tensor([2])]; tensor vc_31_cast_fp16 = squeeze(axes = vc_31_axes_0, x = value_cache_25_cast_fp16)[name = string("vc_31_cast_fp16")]; tensor var_3030 = const()[name = string("op_3030"), val = tensor([1, 8, 128, 256])]; tensor vc_33_cast_fp16 = reshape(shape = var_3030, x = vc_31_cast_fp16)[name = string("vc_33_cast_fp16")]; tensor var_3033_axes_0 = const()[name = string("op_3033_axes_0"), val = tensor([2])]; tensor var_3033_cast_fp16 = expand_dims(axes = var_3033_axes_0, x = kc_33_cast_fp16)[name = string("op_3033_cast_fp16")]; tensor var_3041_reps_0 = const()[name = string("op_3041_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3041_cast_fp16 = tile(reps = var_3041_reps_0, x = var_3033_cast_fp16)[name = string("op_3041_cast_fp16")]; tensor var_3046 = const()[name = string("op_3046"), val = tensor([1, 16, 128, 256])]; tensor kc_35_cast_fp16 = reshape(shape = var_3046, x = var_3041_cast_fp16)[name = string("kc_35_cast_fp16")]; tensor var_3049_axes_0 = const()[name = string("op_3049_axes_0"), val = tensor([2])]; tensor var_3049_cast_fp16 = expand_dims(axes = var_3049_axes_0, x = vc_33_cast_fp16)[name = string("op_3049_cast_fp16")]; tensor var_3057_reps_0 = const()[name = string("op_3057_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3057_cast_fp16 = tile(reps = var_3057_reps_0, x = var_3049_cast_fp16)[name = string("op_3057_cast_fp16")]; tensor var_3062 = const()[name = string("op_3062"), val = tensor([1, 16, 128, 256])]; tensor vc_35_cast_fp16 = reshape(shape = var_3062, x = var_3057_cast_fp16)[name = string("vc_35_cast_fp16")]; bool var_3064_transpose_x_0 = const()[name = string("op_3064_transpose_x_0"), val = bool(false)]; bool var_3064_transpose_y_0 = const()[name = string("op_3064_transpose_y_0"), val = bool(false)]; tensor var_3064_cast_fp16 = matmul(transpose_x = var_3064_transpose_x_0, transpose_y = var_3064_transpose_y_0, x = q_47_cast_fp16, y = kc_35_cast_fp16)[name = string("op_3064_cast_fp16")]; fp16 _inversed_attn_weights_41_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_41_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_41_cast_fp16 = mul(x = var_3064_cast_fp16, y = _inversed_attn_weights_41_y_0_to_fp16)[name = string("_inversed_attn_weights_41_cast_fp16")]; tensor attn_weights_43_cast_fp16 = add(x = _inversed_attn_weights_41_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_43_cast_fp16")]; int32 var_3078 = const()[name = string("op_3078"), val = int32(-1)]; tensor attn_weights_47_cast_fp16 = softmax(axis = var_3078, x = attn_weights_43_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; bool attn_output_21_transpose_x_1 = const()[name = string("attn_output_21_transpose_x_1"), val = bool(false)]; bool attn_output_21_transpose_y_1 = const()[name = string("attn_output_21_transpose_y_1"), val = bool(true)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_1, transpose_y = attn_output_21_transpose_y_1, x = attn_weights_47_cast_fp16, y = vc_35_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_3087_perm_0 = const()[name = string("op_3087_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3091 = const()[name = string("op_3091"), val = tensor([1, 1, -1])]; tensor var_3087_cast_fp16 = transpose(perm = var_3087_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_88")]; tensor input_53_cast_fp16 = reshape(shape = var_3091, x = var_3087_cast_fp16)[name = string("input_53_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84998848))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor var_3097_axes_0 = const()[name = string("op_3097_axes_0"), val = tensor([0])]; tensor var_3097_cast_fp16 = squeeze(axes = var_3097_axes_0, x = linear_38_cast_fp16)[name = string("op_3097_cast_fp16")]; tensor var_3099_axes_0 = const()[name = string("op_3099_axes_0"), val = tensor([0])]; tensor var_3099_cast_fp16 = squeeze(axes = var_3099_axes_0, x = var_3097_cast_fp16)[name = string("op_3099_cast_fp16")]; tensor var_3101_axes_0 = const()[name = string("op_3101_axes_0"), val = tensor([-1])]; tensor var_3101_cast_fp16 = expand_dims(axes = var_3101_axes_0, x = var_3099_cast_fp16)[name = string("op_3101_cast_fp16")]; tensor attn_4d_11_axes_0 = const()[name = string("attn_4d_11_axes_0"), val = tensor([-1])]; tensor attn_4d_11_cast_fp16 = expand_dims(axes = attn_4d_11_axes_0, x = var_3101_cast_fp16)[name = string("attn_4d_11_cast_fp16")]; tensor hidden_21_cast_fp16 = add(x = hidden_19_cast_fp16, y = attn_4d_11_cast_fp16)[name = string("hidden_21_cast_fp16")]; tensor var_3107_axes_0 = const()[name = string("op_3107_axes_0"), val = tensor([-1])]; tensor var_3107_cast_fp16 = squeeze(axes = var_3107_axes_0, x = hidden_21_cast_fp16)[name = string("op_3107_cast_fp16")]; tensor var_3109_axes_0 = const()[name = string("op_3109_axes_0"), val = tensor([-1])]; tensor var_3109_cast_fp16 = squeeze(axes = var_3109_axes_0, x = var_3107_cast_fp16)[name = string("op_3109_cast_fp16")]; tensor hidden_states_139_axes_0 = const()[name = string("hidden_states_139_axes_0"), val = tensor([0])]; tensor hidden_states_139_cast_fp16 = expand_dims(axes = hidden_states_139_axes_0, x = var_3109_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; fp16 var_3115_promoted_to_fp16 = const()[name = string("op_3115_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3121_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_3115_promoted_to_fp16)[name = string("op_3121_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_3121_cast_fp16)[name = string("variance_47_cast_fp16")]; fp16 var_3124_to_fp16 = const()[name = string("op_3124_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3125_cast_fp16 = add(x = variance_47_cast_fp16, y = var_3124_to_fp16)[name = string("op_3125_cast_fp16")]; fp32 var_3126_epsilon_0 = const()[name = string("op_3126_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3126_cast_fp16 = rsqrt(epsilon = var_3126_epsilon_0, x = var_3125_cast_fp16)[name = string("op_3126_cast_fp16")]; tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_3126_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999424)))]; tensor input_55_cast_fp16 = mul(x = const_60_to_fp16, y = hidden_states_143_cast_fp16)[name = string("input_55_cast_fp16")]; tensor layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147328))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_3136_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_3136_cast_fp16")]; tensor layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293696))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_59_cast_fp16 = mul(x = var_3136_cast_fp16, y = linear_40_cast_fp16)[name = string("input_59_cast_fp16")]; tensor layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440064))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_3143_axes_0 = const()[name = string("op_3143_axes_0"), val = tensor([0])]; tensor var_3143_cast_fp16 = squeeze(axes = var_3143_axes_0, x = linear_41_cast_fp16)[name = string("op_3143_cast_fp16")]; tensor var_3145_axes_0 = const()[name = string("op_3145_axes_0"), val = tensor([0])]; tensor var_3145_cast_fp16 = squeeze(axes = var_3145_axes_0, x = var_3143_cast_fp16)[name = string("op_3145_cast_fp16")]; tensor var_3147_axes_0 = const()[name = string("op_3147_axes_0"), val = tensor([-1])]; tensor var_3147_cast_fp16 = expand_dims(axes = var_3147_axes_0, x = var_3145_cast_fp16)[name = string("op_3147_cast_fp16")]; tensor mlp_4d_11_axes_0 = const()[name = string("mlp_4d_11_axes_0"), val = tensor([-1])]; tensor mlp_4d_11_cast_fp16 = expand_dims(axes = mlp_4d_11_axes_0, x = var_3147_cast_fp16)[name = string("mlp_4d_11_cast_fp16")]; tensor hidden_23_cast_fp16 = add(x = hidden_21_cast_fp16, y = mlp_4d_11_cast_fp16)[name = string("hidden_23_cast_fp16")]; tensor var_3161_begin_0 = const()[name = string("op_3161_begin_0"), val = tensor([0, 6144, 0, 0])]; tensor var_3161_end_0 = const()[name = string("op_3161_end_0"), val = tensor([1, 7168, 1, 256])]; tensor var_3161_end_mask_0 = const()[name = string("op_3161_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3161_cast_fp16 = slice_by_index(begin = var_3161_begin_0, end = var_3161_end_0, end_mask = var_3161_end_mask_0, x = key_cache)[name = string("op_3161_cast_fp16")]; tensor var_3181_begin_0 = const()[name = string("op_3181_begin_0"), val = tensor([0, 6144, 0, 0])]; tensor var_3181_end_0 = const()[name = string("op_3181_end_0"), val = tensor([1, 7168, 1, 256])]; tensor var_3181_end_mask_0 = const()[name = string("op_3181_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3181_cast_fp16 = slice_by_index(begin = var_3181_begin_0, end = var_3181_end_0, end_mask = var_3181_end_mask_0, x = value_cache)[name = string("op_3181_cast_fp16")]; tensor var_3193_axes_0 = const()[name = string("op_3193_axes_0"), val = tensor([-1])]; tensor var_3193_cast_fp16 = squeeze(axes = var_3193_axes_0, x = hidden_23_cast_fp16)[name = string("op_3193_cast_fp16")]; tensor var_3195_axes_0 = const()[name = string("op_3195_axes_0"), val = tensor([-1])]; tensor var_3195_cast_fp16 = squeeze(axes = var_3195_axes_0, x = var_3193_cast_fp16)[name = string("op_3195_cast_fp16")]; tensor hidden_states_145_axes_0 = const()[name = string("hidden_states_145_axes_0"), val = tensor([0])]; tensor hidden_states_145_cast_fp16 = expand_dims(axes = hidden_states_145_axes_0, x = var_3195_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; fp16 var_3201_promoted_to_fp16 = const()[name = string("op_3201_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3207_cast_fp16 = pow(x = hidden_states_145_cast_fp16, y = var_3201_promoted_to_fp16)[name = string("op_3207_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_3207_cast_fp16)[name = string("variance_49_cast_fp16")]; fp16 var_3210_to_fp16 = const()[name = string("op_3210_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3211_cast_fp16 = add(x = variance_49_cast_fp16, y = var_3210_to_fp16)[name = string("op_3211_cast_fp16")]; fp32 var_3212_epsilon_0 = const()[name = string("op_3212_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3212_cast_fp16 = rsqrt(epsilon = var_3212_epsilon_0, x = var_3211_cast_fp16)[name = string("op_3212_cast_fp16")]; tensor hidden_states_149_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = var_3212_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; tensor const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440640)))]; tensor input_61_cast_fp16 = mul(x = const_61_to_fp16, y = hidden_states_149_cast_fp16)[name = string("input_61_cast_fp16")]; tensor layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94442752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96539968))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96540544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589184))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638400))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor var_3229 = const()[name = string("op_3229"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_151_cast_fp16 = reshape(shape = var_3229, x = linear_42_cast_fp16)[name = string("hidden_states_151_cast_fp16")]; tensor var_3235 = const()[name = string("op_3235"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_157_cast_fp16 = reshape(shape = var_3235, x = linear_43_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; tensor var_3241 = const()[name = string("op_3241"), val = tensor([1, 1, 8, 128])]; tensor v_39_cast_fp16 = reshape(shape = var_3241, x = linear_44_cast_fp16)[name = string("v_39_cast_fp16")]; fp16 var_3246_promoted_to_fp16 = const()[name = string("op_3246_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3252_cast_fp16 = pow(x = hidden_states_151_cast_fp16, y = var_3246_promoted_to_fp16)[name = string("op_3252_cast_fp16")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_3252_cast_fp16)[name = string("variance_51_cast_fp16")]; fp16 var_3255_to_fp16 = const()[name = string("op_3255_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3256_cast_fp16 = add(x = variance_51_cast_fp16, y = var_3255_to_fp16)[name = string("op_3256_cast_fp16")]; fp32 var_3257_epsilon_0 = const()[name = string("op_3257_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3257_cast_fp16 = rsqrt(epsilon = var_3257_epsilon_0, x = var_3256_cast_fp16)[name = string("op_3257_cast_fp16")]; tensor hidden_states_155_cast_fp16 = mul(x = hidden_states_151_cast_fp16, y = var_3257_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; tensor const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638976)))]; tensor q_51_cast_fp16 = mul(x = const_62_to_fp16, y = hidden_states_155_cast_fp16)[name = string("q_51_cast_fp16")]; fp16 var_3264_promoted_to_fp16 = const()[name = string("op_3264_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3270_cast_fp16 = pow(x = hidden_states_157_cast_fp16, y = var_3264_promoted_to_fp16)[name = string("op_3270_cast_fp16")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_3270_cast_fp16)[name = string("variance_53_cast_fp16")]; fp16 var_3273_to_fp16 = const()[name = string("op_3273_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3274_cast_fp16 = add(x = variance_53_cast_fp16, y = var_3273_to_fp16)[name = string("op_3274_cast_fp16")]; fp32 var_3275_epsilon_0 = const()[name = string("op_3275_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3275_cast_fp16 = rsqrt(epsilon = var_3275_epsilon_0, x = var_3274_cast_fp16)[name = string("op_3275_cast_fp16")]; tensor hidden_states_161_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = var_3275_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor const_63_to_fp16 = const()[name = string("const_63_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639296)))]; tensor k_51_cast_fp16 = mul(x = const_63_to_fp16, y = hidden_states_161_cast_fp16)[name = string("k_51_cast_fp16")]; tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_41_perm_0 = const()[name = string("v_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_53_cast_fp16 = transpose(perm = q_53_perm_0, x = q_51_cast_fp16)[name = string("transpose_87")]; tensor var_3292_cast_fp16 = mul(x = q_53_cast_fp16, y = cos_3_cast_fp16)[name = string("op_3292_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_53_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_53_cast_fp16)[name = string("x2_25_cast_fp16")]; fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3313_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3313_cast_fp16")]; int32 var_3315 = const()[name = string("op_3315"), val = int32(-1)]; bool var_3316_interleave_0 = const()[name = string("op_3316_interleave_0"), val = bool(false)]; tensor var_3316_cast_fp16 = concat(axis = var_3315, interleave = var_3316_interleave_0, values = (var_3313_cast_fp16, x1_25_cast_fp16))[name = string("op_3316_cast_fp16")]; tensor var_3317_cast_fp16 = mul(x = var_3316_cast_fp16, y = sin_3_cast_fp16)[name = string("op_3317_cast_fp16")]; tensor q_55_cast_fp16 = add(x = var_3292_cast_fp16, y = var_3317_cast_fp16)[name = string("q_55_cast_fp16")]; tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = k_51_cast_fp16)[name = string("transpose_86")]; tensor var_3320_cast_fp16 = mul(x = k_53_cast_fp16, y = cos_3_cast_fp16)[name = string("op_3320_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_53_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_53_cast_fp16)[name = string("x2_27_cast_fp16")]; fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3341_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_69_promoted_to_fp16)[name = string("op_3341_cast_fp16")]; int32 var_3343 = const()[name = string("op_3343"), val = int32(-1)]; bool var_3344_interleave_0 = const()[name = string("op_3344_interleave_0"), val = bool(false)]; tensor var_3344_cast_fp16 = concat(axis = var_3343, interleave = var_3344_interleave_0, values = (var_3341_cast_fp16, x1_27_cast_fp16))[name = string("op_3344_cast_fp16")]; tensor var_3345_cast_fp16 = mul(x = var_3344_cast_fp16, y = sin_3_cast_fp16)[name = string("op_3345_cast_fp16")]; tensor k_55_cast_fp16 = add(x = var_3320_cast_fp16, y = var_3345_cast_fp16)[name = string("k_55_cast_fp16")]; tensor var_3352 = const()[name = string("op_3352"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_13_cast_fp16 = reshape(shape = var_3352, x = k_55_cast_fp16)[name = string("nk_flat_13_cast_fp16")]; tensor var_3358 = const()[name = string("op_3358"), val = tensor([1, 1024, 1, 1])]; tensor v_41_cast_fp16 = transpose(perm = v_41_perm_0, x = v_39_cast_fp16)[name = string("transpose_85")]; tensor nv_flat_13_cast_fp16 = reshape(shape = var_3358, x = v_41_cast_fp16)[name = string("nv_flat_13_cast_fp16")]; tensor var_3367_cast_fp16 = mul(x = var_3161_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3367_cast_fp16")]; tensor var_3368_cast_fp16 = mul(x = nk_flat_13_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3368_cast_fp16")]; tensor key_cache_29_cast_fp16 = add(x = var_3367_cast_fp16, y = var_3368_cast_fp16)[name = string("key_cache_29_cast_fp16")]; tensor var_3374_cast_fp16 = mul(x = var_3181_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3374_cast_fp16")]; tensor var_3375_cast_fp16 = mul(x = nv_flat_13_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3375_cast_fp16")]; tensor value_cache_29_cast_fp16 = add(x = var_3374_cast_fp16, y = var_3375_cast_fp16)[name = string("value_cache_29_cast_fp16")]; tensor kc_37_axes_0 = const()[name = string("kc_37_axes_0"), val = tensor([2])]; tensor kc_37_cast_fp16 = squeeze(axes = kc_37_axes_0, x = key_cache_29_cast_fp16)[name = string("kc_37_cast_fp16")]; tensor var_3384 = const()[name = string("op_3384"), val = tensor([1, 8, 128, 256])]; tensor kc_39_cast_fp16 = reshape(shape = var_3384, x = kc_37_cast_fp16)[name = string("kc_39_cast_fp16")]; tensor vc_37_axes_0 = const()[name = string("vc_37_axes_0"), val = tensor([2])]; tensor vc_37_cast_fp16 = squeeze(axes = vc_37_axes_0, x = value_cache_29_cast_fp16)[name = string("vc_37_cast_fp16")]; tensor var_3392 = const()[name = string("op_3392"), val = tensor([1, 8, 128, 256])]; tensor vc_39_cast_fp16 = reshape(shape = var_3392, x = vc_37_cast_fp16)[name = string("vc_39_cast_fp16")]; tensor var_3395_axes_0 = const()[name = string("op_3395_axes_0"), val = tensor([2])]; tensor var_3395_cast_fp16 = expand_dims(axes = var_3395_axes_0, x = kc_39_cast_fp16)[name = string("op_3395_cast_fp16")]; tensor var_3403_reps_0 = const()[name = string("op_3403_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3403_cast_fp16 = tile(reps = var_3403_reps_0, x = var_3395_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor var_3408 = const()[name = string("op_3408"), val = tensor([1, 16, 128, 256])]; tensor kc_41_cast_fp16 = reshape(shape = var_3408, x = var_3403_cast_fp16)[name = string("kc_41_cast_fp16")]; tensor var_3411_axes_0 = const()[name = string("op_3411_axes_0"), val = tensor([2])]; tensor var_3411_cast_fp16 = expand_dims(axes = var_3411_axes_0, x = vc_39_cast_fp16)[name = string("op_3411_cast_fp16")]; tensor var_3419_reps_0 = const()[name = string("op_3419_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3419_cast_fp16 = tile(reps = var_3419_reps_0, x = var_3411_cast_fp16)[name = string("op_3419_cast_fp16")]; tensor var_3424 = const()[name = string("op_3424"), val = tensor([1, 16, 128, 256])]; tensor vc_41_cast_fp16 = reshape(shape = var_3424, x = var_3419_cast_fp16)[name = string("vc_41_cast_fp16")]; bool var_3426_transpose_x_0 = const()[name = string("op_3426_transpose_x_0"), val = bool(false)]; bool var_3426_transpose_y_0 = const()[name = string("op_3426_transpose_y_0"), val = bool(false)]; tensor var_3426_cast_fp16 = matmul(transpose_x = var_3426_transpose_x_0, transpose_y = var_3426_transpose_y_0, x = q_55_cast_fp16, y = kc_41_cast_fp16)[name = string("op_3426_cast_fp16")]; fp16 _inversed_attn_weights_49_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_49_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_49_cast_fp16 = mul(x = var_3426_cast_fp16, y = _inversed_attn_weights_49_y_0_to_fp16)[name = string("_inversed_attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = _inversed_attn_weights_49_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_51_cast_fp16")]; int32 var_3440 = const()[name = string("op_3440"), val = int32(-1)]; tensor attn_weights_55_cast_fp16 = softmax(axis = var_3440, x = attn_weights_51_cast_fp16)[name = string("attn_weights_55_cast_fp16")]; bool attn_output_25_transpose_x_1 = const()[name = string("attn_output_25_transpose_x_1"), val = bool(false)]; bool attn_output_25_transpose_y_1 = const()[name = string("attn_output_25_transpose_y_1"), val = bool(true)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_1, transpose_y = attn_output_25_transpose_y_1, x = attn_weights_55_cast_fp16, y = vc_41_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_3449_perm_0 = const()[name = string("op_3449_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3453 = const()[name = string("op_3453"), val = tensor([1, 1, -1])]; tensor var_3449_cast_fp16 = transpose(perm = var_3449_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_84")]; tensor input_63_cast_fp16 = reshape(shape = var_3453, x = var_3449_cast_fp16)[name = string("input_63_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100736832))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor var_3459_axes_0 = const()[name = string("op_3459_axes_0"), val = tensor([0])]; tensor var_3459_cast_fp16 = squeeze(axes = var_3459_axes_0, x = linear_45_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3461_axes_0 = const()[name = string("op_3461_axes_0"), val = tensor([0])]; tensor var_3461_cast_fp16 = squeeze(axes = var_3461_axes_0, x = var_3459_cast_fp16)[name = string("op_3461_cast_fp16")]; tensor var_3463_axes_0 = const()[name = string("op_3463_axes_0"), val = tensor([-1])]; tensor var_3463_cast_fp16 = expand_dims(axes = var_3463_axes_0, x = var_3461_cast_fp16)[name = string("op_3463_cast_fp16")]; tensor attn_4d_13_axes_0 = const()[name = string("attn_4d_13_axes_0"), val = tensor([-1])]; tensor attn_4d_13_cast_fp16 = expand_dims(axes = attn_4d_13_axes_0, x = var_3463_cast_fp16)[name = string("attn_4d_13_cast_fp16")]; tensor hidden_25_cast_fp16 = add(x = hidden_23_cast_fp16, y = attn_4d_13_cast_fp16)[name = string("hidden_25_cast_fp16")]; tensor var_3469_axes_0 = const()[name = string("op_3469_axes_0"), val = tensor([-1])]; tensor var_3469_cast_fp16 = squeeze(axes = var_3469_axes_0, x = hidden_25_cast_fp16)[name = string("op_3469_cast_fp16")]; tensor var_3471_axes_0 = const()[name = string("op_3471_axes_0"), val = tensor([-1])]; tensor var_3471_cast_fp16 = squeeze(axes = var_3471_axes_0, x = var_3469_cast_fp16)[name = string("op_3471_cast_fp16")]; tensor hidden_states_163_axes_0 = const()[name = string("hidden_states_163_axes_0"), val = tensor([0])]; tensor hidden_states_163_cast_fp16 = expand_dims(axes = hidden_states_163_axes_0, x = var_3471_cast_fp16)[name = string("hidden_states_163_cast_fp16")]; fp16 var_3477_promoted_to_fp16 = const()[name = string("op_3477_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3483_cast_fp16 = pow(x = hidden_states_163_cast_fp16, y = var_3477_promoted_to_fp16)[name = string("op_3483_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_3483_cast_fp16)[name = string("variance_55_cast_fp16")]; fp16 var_3486_to_fp16 = const()[name = string("op_3486_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3487_cast_fp16 = add(x = variance_55_cast_fp16, y = var_3486_to_fp16)[name = string("op_3487_cast_fp16")]; fp32 var_3488_epsilon_0 = const()[name = string("op_3488_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3488_cast_fp16 = rsqrt(epsilon = var_3488_epsilon_0, x = var_3487_cast_fp16)[name = string("op_3488_cast_fp16")]; tensor hidden_states_167_cast_fp16 = mul(x = hidden_states_163_cast_fp16, y = var_3488_cast_fp16)[name = string("hidden_states_167_cast_fp16")]; tensor const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100737408)))]; tensor input_65_cast_fp16 = mul(x = const_70_to_fp16, y = hidden_states_167_cast_fp16)[name = string("input_65_cast_fp16")]; tensor layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100739520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885312))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_3498_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_3498_cast_fp16")]; tensor layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107031680))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_69_cast_fp16 = mul(x = var_3498_cast_fp16, y = linear_47_cast_fp16)[name = string("input_69_cast_fp16")]; tensor layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107032256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178048))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_3505_axes_0 = const()[name = string("op_3505_axes_0"), val = tensor([0])]; tensor var_3505_cast_fp16 = squeeze(axes = var_3505_axes_0, x = linear_48_cast_fp16)[name = string("op_3505_cast_fp16")]; tensor var_3507_axes_0 = const()[name = string("op_3507_axes_0"), val = tensor([0])]; tensor var_3507_cast_fp16 = squeeze(axes = var_3507_axes_0, x = var_3505_cast_fp16)[name = string("op_3507_cast_fp16")]; tensor var_3509_axes_0 = const()[name = string("op_3509_axes_0"), val = tensor([-1])]; tensor var_3509_cast_fp16 = expand_dims(axes = var_3509_axes_0, x = var_3507_cast_fp16)[name = string("op_3509_cast_fp16")]; tensor mlp_4d_13_axes_0 = const()[name = string("mlp_4d_13_axes_0"), val = tensor([-1])]; tensor mlp_4d_13_cast_fp16 = expand_dims(axes = mlp_4d_13_axes_0, x = var_3509_cast_fp16)[name = string("mlp_4d_13_cast_fp16")]; tensor hidden_27_cast_fp16 = add(x = hidden_25_cast_fp16, y = mlp_4d_13_cast_fp16)[name = string("hidden_27_cast_fp16")]; tensor var_3523_begin_0 = const()[name = string("op_3523_begin_0"), val = tensor([0, 7168, 0, 0])]; tensor var_3523_end_0 = const()[name = string("op_3523_end_0"), val = tensor([1, 8192, 1, 256])]; tensor var_3523_end_mask_0 = const()[name = string("op_3523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3523_cast_fp16 = slice_by_index(begin = var_3523_begin_0, end = var_3523_end_0, end_mask = var_3523_end_mask_0, x = key_cache)[name = string("op_3523_cast_fp16")]; tensor var_3543_begin_0 = const()[name = string("op_3543_begin_0"), val = tensor([0, 7168, 0, 0])]; tensor var_3543_end_0 = const()[name = string("op_3543_end_0"), val = tensor([1, 8192, 1, 256])]; tensor var_3543_end_mask_0 = const()[name = string("op_3543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3543_cast_fp16 = slice_by_index(begin = var_3543_begin_0, end = var_3543_end_0, end_mask = var_3543_end_mask_0, x = value_cache)[name = string("op_3543_cast_fp16")]; tensor var_3555_axes_0 = const()[name = string("op_3555_axes_0"), val = tensor([-1])]; tensor var_3555_cast_fp16 = squeeze(axes = var_3555_axes_0, x = hidden_27_cast_fp16)[name = string("op_3555_cast_fp16")]; tensor var_3557_axes_0 = const()[name = string("op_3557_axes_0"), val = tensor([-1])]; tensor var_3557_cast_fp16 = squeeze(axes = var_3557_axes_0, x = var_3555_cast_fp16)[name = string("op_3557_cast_fp16")]; tensor hidden_states_169_axes_0 = const()[name = string("hidden_states_169_axes_0"), val = tensor([0])]; tensor hidden_states_169_cast_fp16 = expand_dims(axes = hidden_states_169_axes_0, x = var_3557_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; fp16 var_3563_promoted_to_fp16 = const()[name = string("op_3563_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3569_cast_fp16 = pow(x = hidden_states_169_cast_fp16, y = var_3563_promoted_to_fp16)[name = string("op_3569_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_3569_cast_fp16)[name = string("variance_57_cast_fp16")]; fp16 var_3572_to_fp16 = const()[name = string("op_3572_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3573_cast_fp16 = add(x = variance_57_cast_fp16, y = var_3572_to_fp16)[name = string("op_3573_cast_fp16")]; fp32 var_3574_epsilon_0 = const()[name = string("op_3574_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3574_cast_fp16 = rsqrt(epsilon = var_3574_epsilon_0, x = var_3573_cast_fp16)[name = string("op_3574_cast_fp16")]; tensor hidden_states_173_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = var_3574_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; tensor const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178624)))]; tensor input_71_cast_fp16 = mul(x = const_71_to_fp16, y = hidden_states_173_cast_fp16)[name = string("input_71_cast_fp16")]; tensor layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112277952))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112278528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327168))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376384))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor var_3591 = const()[name = string("op_3591"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_175_cast_fp16 = reshape(shape = var_3591, x = linear_49_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; tensor var_3597 = const()[name = string("op_3597"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_181_cast_fp16 = reshape(shape = var_3597, x = linear_50_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; tensor var_3603 = const()[name = string("op_3603"), val = tensor([1, 1, 8, 128])]; tensor v_45_cast_fp16 = reshape(shape = var_3603, x = linear_51_cast_fp16)[name = string("v_45_cast_fp16")]; fp16 var_3608_promoted_to_fp16 = const()[name = string("op_3608_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3614_cast_fp16 = pow(x = hidden_states_175_cast_fp16, y = var_3608_promoted_to_fp16)[name = string("op_3614_cast_fp16")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_3614_cast_fp16)[name = string("variance_59_cast_fp16")]; fp16 var_3617_to_fp16 = const()[name = string("op_3617_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3618_cast_fp16 = add(x = variance_59_cast_fp16, y = var_3617_to_fp16)[name = string("op_3618_cast_fp16")]; fp32 var_3619_epsilon_0 = const()[name = string("op_3619_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3619_cast_fp16 = rsqrt(epsilon = var_3619_epsilon_0, x = var_3618_cast_fp16)[name = string("op_3619_cast_fp16")]; tensor hidden_states_179_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = var_3619_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; tensor const_72_to_fp16 = const()[name = string("const_72_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376960)))]; tensor q_59_cast_fp16 = mul(x = const_72_to_fp16, y = hidden_states_179_cast_fp16)[name = string("q_59_cast_fp16")]; fp16 var_3626_promoted_to_fp16 = const()[name = string("op_3626_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3632_cast_fp16 = pow(x = hidden_states_181_cast_fp16, y = var_3626_promoted_to_fp16)[name = string("op_3632_cast_fp16")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_3632_cast_fp16)[name = string("variance_61_cast_fp16")]; fp16 var_3635_to_fp16 = const()[name = string("op_3635_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3636_cast_fp16 = add(x = variance_61_cast_fp16, y = var_3635_to_fp16)[name = string("op_3636_cast_fp16")]; fp32 var_3637_epsilon_0 = const()[name = string("op_3637_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3637_cast_fp16 = rsqrt(epsilon = var_3637_epsilon_0, x = var_3636_cast_fp16)[name = string("op_3637_cast_fp16")]; tensor hidden_states_185_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = var_3637_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; tensor const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377280)))]; tensor k_59_cast_fp16 = mul(x = const_73_to_fp16, y = hidden_states_185_cast_fp16)[name = string("k_59_cast_fp16")]; tensor q_61_perm_0 = const()[name = string("q_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_61_perm_0 = const()[name = string("k_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_47_perm_0 = const()[name = string("v_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_61_cast_fp16 = transpose(perm = q_61_perm_0, x = q_59_cast_fp16)[name = string("transpose_83")]; tensor var_3654_cast_fp16 = mul(x = q_61_cast_fp16, y = cos_3_cast_fp16)[name = string("op_3654_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_61_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_61_cast_fp16)[name = string("x2_29_cast_fp16")]; fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3675_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_3675_cast_fp16")]; int32 var_3677 = const()[name = string("op_3677"), val = int32(-1)]; bool var_3678_interleave_0 = const()[name = string("op_3678_interleave_0"), val = bool(false)]; tensor var_3678_cast_fp16 = concat(axis = var_3677, interleave = var_3678_interleave_0, values = (var_3675_cast_fp16, x1_29_cast_fp16))[name = string("op_3678_cast_fp16")]; tensor var_3679_cast_fp16 = mul(x = var_3678_cast_fp16, y = sin_3_cast_fp16)[name = string("op_3679_cast_fp16")]; tensor q_63_cast_fp16 = add(x = var_3654_cast_fp16, y = var_3679_cast_fp16)[name = string("q_63_cast_fp16")]; tensor k_61_cast_fp16 = transpose(perm = k_61_perm_0, x = k_59_cast_fp16)[name = string("transpose_82")]; tensor var_3682_cast_fp16 = mul(x = k_61_cast_fp16, y = cos_3_cast_fp16)[name = string("op_3682_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_61_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_61_cast_fp16)[name = string("x2_31_cast_fp16")]; fp16 const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3703_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_79_promoted_to_fp16)[name = string("op_3703_cast_fp16")]; int32 var_3705 = const()[name = string("op_3705"), val = int32(-1)]; bool var_3706_interleave_0 = const()[name = string("op_3706_interleave_0"), val = bool(false)]; tensor var_3706_cast_fp16 = concat(axis = var_3705, interleave = var_3706_interleave_0, values = (var_3703_cast_fp16, x1_31_cast_fp16))[name = string("op_3706_cast_fp16")]; tensor var_3707_cast_fp16 = mul(x = var_3706_cast_fp16, y = sin_3_cast_fp16)[name = string("op_3707_cast_fp16")]; tensor k_63_cast_fp16 = add(x = var_3682_cast_fp16, y = var_3707_cast_fp16)[name = string("k_63_cast_fp16")]; tensor var_3714 = const()[name = string("op_3714"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_15_cast_fp16 = reshape(shape = var_3714, x = k_63_cast_fp16)[name = string("nk_flat_15_cast_fp16")]; tensor var_3720 = const()[name = string("op_3720"), val = tensor([1, 1024, 1, 1])]; tensor v_47_cast_fp16 = transpose(perm = v_47_perm_0, x = v_45_cast_fp16)[name = string("transpose_81")]; tensor nv_flat_15_cast_fp16 = reshape(shape = var_3720, x = v_47_cast_fp16)[name = string("nv_flat_15_cast_fp16")]; tensor var_3729_cast_fp16 = mul(x = var_3523_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3729_cast_fp16")]; tensor var_3730_cast_fp16 = mul(x = nk_flat_15_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3730_cast_fp16")]; tensor key_cache_33_cast_fp16 = add(x = var_3729_cast_fp16, y = var_3730_cast_fp16)[name = string("key_cache_33_cast_fp16")]; tensor var_3736_cast_fp16 = mul(x = var_3543_cast_fp16, y = var_1194_cast_fp16)[name = string("op_3736_cast_fp16")]; tensor var_3737_cast_fp16 = mul(x = nv_flat_15_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_3737_cast_fp16")]; tensor value_cache_33_cast_fp16 = add(x = var_3736_cast_fp16, y = var_3737_cast_fp16)[name = string("value_cache_33_cast_fp16")]; tensor kc_43_axes_0 = const()[name = string("kc_43_axes_0"), val = tensor([2])]; tensor kc_43_cast_fp16 = squeeze(axes = kc_43_axes_0, x = key_cache_33_cast_fp16)[name = string("kc_43_cast_fp16")]; tensor var_3746 = const()[name = string("op_3746"), val = tensor([1, 8, 128, 256])]; tensor kc_45_cast_fp16 = reshape(shape = var_3746, x = kc_43_cast_fp16)[name = string("kc_45_cast_fp16")]; tensor vc_43_axes_0 = const()[name = string("vc_43_axes_0"), val = tensor([2])]; tensor vc_43_cast_fp16 = squeeze(axes = vc_43_axes_0, x = value_cache_33_cast_fp16)[name = string("vc_43_cast_fp16")]; tensor var_3754 = const()[name = string("op_3754"), val = tensor([1, 8, 128, 256])]; tensor vc_45_cast_fp16 = reshape(shape = var_3754, x = vc_43_cast_fp16)[name = string("vc_45_cast_fp16")]; tensor var_3757_axes_0 = const()[name = string("op_3757_axes_0"), val = tensor([2])]; tensor var_3757_cast_fp16 = expand_dims(axes = var_3757_axes_0, x = kc_45_cast_fp16)[name = string("op_3757_cast_fp16")]; tensor var_3765_reps_0 = const()[name = string("op_3765_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3765_cast_fp16 = tile(reps = var_3765_reps_0, x = var_3757_cast_fp16)[name = string("op_3765_cast_fp16")]; tensor var_3770 = const()[name = string("op_3770"), val = tensor([1, 16, 128, 256])]; tensor kc_47_cast_fp16 = reshape(shape = var_3770, x = var_3765_cast_fp16)[name = string("kc_47_cast_fp16")]; tensor var_3773_axes_0 = const()[name = string("op_3773_axes_0"), val = tensor([2])]; tensor var_3773_cast_fp16 = expand_dims(axes = var_3773_axes_0, x = vc_45_cast_fp16)[name = string("op_3773_cast_fp16")]; tensor var_3781_reps_0 = const()[name = string("op_3781_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3781_cast_fp16 = tile(reps = var_3781_reps_0, x = var_3773_cast_fp16)[name = string("op_3781_cast_fp16")]; tensor var_3786 = const()[name = string("op_3786"), val = tensor([1, 16, 128, 256])]; tensor vc_47_cast_fp16 = reshape(shape = var_3786, x = var_3781_cast_fp16)[name = string("vc_47_cast_fp16")]; bool var_3788_transpose_x_0 = const()[name = string("op_3788_transpose_x_0"), val = bool(false)]; bool var_3788_transpose_y_0 = const()[name = string("op_3788_transpose_y_0"), val = bool(false)]; tensor var_3788_cast_fp16 = matmul(transpose_x = var_3788_transpose_x_0, transpose_y = var_3788_transpose_y_0, x = q_63_cast_fp16, y = kc_47_cast_fp16)[name = string("op_3788_cast_fp16")]; fp16 _inversed_attn_weights_57_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_57_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_57_cast_fp16 = mul(x = var_3788_cast_fp16, y = _inversed_attn_weights_57_y_0_to_fp16)[name = string("_inversed_attn_weights_57_cast_fp16")]; tensor attn_weights_59_cast_fp16 = add(x = _inversed_attn_weights_57_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; int32 var_3802 = const()[name = string("op_3802"), val = int32(-1)]; tensor attn_weights_63_cast_fp16 = softmax(axis = var_3802, x = attn_weights_59_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; bool attn_output_29_transpose_x_1 = const()[name = string("attn_output_29_transpose_x_1"), val = bool(false)]; bool attn_output_29_transpose_y_1 = const()[name = string("attn_output_29_transpose_y_1"), val = bool(true)]; tensor attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_1, transpose_y = attn_output_29_transpose_y_1, x = attn_weights_63_cast_fp16, y = vc_47_cast_fp16)[name = string("attn_output_29_cast_fp16")]; tensor var_3811_perm_0 = const()[name = string("op_3811_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3815 = const()[name = string("op_3815"), val = tensor([1, 1, -1])]; tensor var_3811_cast_fp16 = transpose(perm = var_3811_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_80")]; tensor input_73_cast_fp16 = reshape(shape = var_3815, x = var_3811_cast_fp16)[name = string("input_73_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116474816))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor var_3821_axes_0 = const()[name = string("op_3821_axes_0"), val = tensor([0])]; tensor var_3821_cast_fp16 = squeeze(axes = var_3821_axes_0, x = linear_52_cast_fp16)[name = string("op_3821_cast_fp16")]; tensor var_3823_axes_0 = const()[name = string("op_3823_axes_0"), val = tensor([0])]; tensor var_3823_cast_fp16 = squeeze(axes = var_3823_axes_0, x = var_3821_cast_fp16)[name = string("op_3823_cast_fp16")]; tensor var_3825_axes_0 = const()[name = string("op_3825_axes_0"), val = tensor([-1])]; tensor var_3825_cast_fp16 = expand_dims(axes = var_3825_axes_0, x = var_3823_cast_fp16)[name = string("op_3825_cast_fp16")]; tensor attn_4d_15_axes_0 = const()[name = string("attn_4d_15_axes_0"), val = tensor([-1])]; tensor attn_4d_15_cast_fp16 = expand_dims(axes = attn_4d_15_axes_0, x = var_3825_cast_fp16)[name = string("attn_4d_15_cast_fp16")]; tensor hidden_29_cast_fp16 = add(x = hidden_27_cast_fp16, y = attn_4d_15_cast_fp16)[name = string("hidden_29_cast_fp16")]; tensor var_3831_axes_0 = const()[name = string("op_3831_axes_0"), val = tensor([-1])]; tensor var_3831_cast_fp16 = squeeze(axes = var_3831_axes_0, x = hidden_29_cast_fp16)[name = string("op_3831_cast_fp16")]; tensor var_3833_axes_0 = const()[name = string("op_3833_axes_0"), val = tensor([-1])]; tensor var_3833_cast_fp16 = squeeze(axes = var_3833_axes_0, x = var_3831_cast_fp16)[name = string("op_3833_cast_fp16")]; tensor hidden_states_187_axes_0 = const()[name = string("hidden_states_187_axes_0"), val = tensor([0])]; tensor hidden_states_187_cast_fp16 = expand_dims(axes = hidden_states_187_axes_0, x = var_3833_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; fp16 var_3839_promoted_to_fp16 = const()[name = string("op_3839_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3845_cast_fp16 = pow(x = hidden_states_187_cast_fp16, y = var_3839_promoted_to_fp16)[name = string("op_3845_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3845_cast_fp16)[name = string("variance_63_cast_fp16")]; fp16 var_3848_to_fp16 = const()[name = string("op_3848_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3849_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3848_to_fp16)[name = string("op_3849_cast_fp16")]; fp32 var_3850_epsilon_0 = const()[name = string("op_3850_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3850_cast_fp16 = rsqrt(epsilon = var_3850_epsilon_0, x = var_3849_cast_fp16)[name = string("op_3850_cast_fp16")]; tensor hidden_states_191_cast_fp16 = mul(x = hidden_states_187_cast_fp16, y = var_3850_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; tensor const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116475392)))]; tensor input_75_cast_fp16 = mul(x = const_80_to_fp16, y = hidden_states_191_cast_fp16)[name = string("input_75_cast_fp16")]; tensor layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116477504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623296))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_3860_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_3860_cast_fp16")]; tensor layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122769664))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_3860_cast_fp16, y = linear_54_cast_fp16)[name = string("input_79_cast_fp16")]; tensor layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122770240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916032))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor var_3867_axes_0 = const()[name = string("op_3867_axes_0"), val = tensor([0])]; tensor var_3867_cast_fp16 = squeeze(axes = var_3867_axes_0, x = linear_55_cast_fp16)[name = string("op_3867_cast_fp16")]; tensor var_3869_axes_0 = const()[name = string("op_3869_axes_0"), val = tensor([0])]; tensor var_3869_cast_fp16 = squeeze(axes = var_3869_axes_0, x = var_3867_cast_fp16)[name = string("op_3869_cast_fp16")]; tensor var_3871_axes_0 = const()[name = string("op_3871_axes_0"), val = tensor([-1])]; tensor var_3871_cast_fp16 = expand_dims(axes = var_3871_axes_0, x = var_3869_cast_fp16)[name = string("op_3871_cast_fp16")]; tensor mlp_4d_15_axes_0 = const()[name = string("mlp_4d_15_axes_0"), val = tensor([-1])]; tensor mlp_4d_15_cast_fp16 = expand_dims(axes = mlp_4d_15_axes_0, x = var_3871_cast_fp16)[name = string("mlp_4d_15_cast_fp16")]; tensor hidden_31_cast_fp16 = add(x = hidden_29_cast_fp16, y = mlp_4d_15_cast_fp16)[name = string("hidden_31_cast_fp16")]; tensor var_3885_begin_0 = const()[name = string("op_3885_begin_0"), val = tensor([0, 8192, 0, 0])]; tensor var_3885_end_0 = const()[name = string("op_3885_end_0"), val = tensor([1, 9216, 1, 256])]; tensor var_3885_end_mask_0 = const()[name = string("op_3885_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3885_cast_fp16 = slice_by_index(begin = var_3885_begin_0, end = var_3885_end_0, end_mask = var_3885_end_mask_0, x = key_cache)[name = string("op_3885_cast_fp16")]; tensor var_3905_begin_0 = const()[name = string("op_3905_begin_0"), val = tensor([0, 8192, 0, 0])]; tensor var_3905_end_0 = const()[name = string("op_3905_end_0"), val = tensor([1, 9216, 1, 256])]; tensor var_3905_end_mask_0 = const()[name = string("op_3905_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3905_cast_fp16 = slice_by_index(begin = var_3905_begin_0, end = var_3905_end_0, end_mask = var_3905_end_mask_0, x = value_cache)[name = string("op_3905_cast_fp16")]; tensor var_3917_axes_0 = const()[name = string("op_3917_axes_0"), val = tensor([-1])]; tensor var_3917_cast_fp16 = squeeze(axes = var_3917_axes_0, x = hidden_31_cast_fp16)[name = string("op_3917_cast_fp16")]; tensor var_3919_axes_0 = const()[name = string("op_3919_axes_0"), val = tensor([-1])]; tensor var_3919_cast_fp16 = squeeze(axes = var_3919_axes_0, x = var_3917_cast_fp16)[name = string("op_3919_cast_fp16")]; tensor hidden_states_193_axes_0 = const()[name = string("hidden_states_193_axes_0"), val = tensor([0])]; tensor hidden_states_193_cast_fp16 = expand_dims(axes = hidden_states_193_axes_0, x = var_3919_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; fp16 var_3925_promoted_to_fp16 = const()[name = string("op_3925_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3931_cast_fp16 = pow(x = hidden_states_193_cast_fp16, y = var_3925_promoted_to_fp16)[name = string("op_3931_cast_fp16")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3931_cast_fp16)[name = string("variance_65_cast_fp16")]; fp16 var_3934_to_fp16 = const()[name = string("op_3934_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3935_cast_fp16 = add(x = variance_65_cast_fp16, y = var_3934_to_fp16)[name = string("op_3935_cast_fp16")]; fp32 var_3936_epsilon_0 = const()[name = string("op_3936_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3936_cast_fp16 = rsqrt(epsilon = var_3936_epsilon_0, x = var_3935_cast_fp16)[name = string("op_3936_cast_fp16")]; tensor hidden_states_197_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = var_3936_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; tensor const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916608)))]; tensor input_81_cast_fp16 = mul(x = const_81_to_fp16, y = hidden_states_197_cast_fp16)[name = string("input_81_cast_fp16")]; tensor layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125918720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128015936))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128016512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065152))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114368))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_3953 = const()[name = string("op_3953"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_199_cast_fp16 = reshape(shape = var_3953, x = linear_56_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; tensor var_3959 = const()[name = string("op_3959"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_205_cast_fp16 = reshape(shape = var_3959, x = linear_57_cast_fp16)[name = string("hidden_states_205_cast_fp16")]; tensor var_3965 = const()[name = string("op_3965"), val = tensor([1, 1, 8, 128])]; tensor v_51_cast_fp16 = reshape(shape = var_3965, x = linear_58_cast_fp16)[name = string("v_51_cast_fp16")]; fp16 var_3970_promoted_to_fp16 = const()[name = string("op_3970_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3976_cast_fp16 = pow(x = hidden_states_199_cast_fp16, y = var_3970_promoted_to_fp16)[name = string("op_3976_cast_fp16")]; tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_3976_cast_fp16)[name = string("variance_67_cast_fp16")]; fp16 var_3979_to_fp16 = const()[name = string("op_3979_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3980_cast_fp16 = add(x = variance_67_cast_fp16, y = var_3979_to_fp16)[name = string("op_3980_cast_fp16")]; fp32 var_3981_epsilon_0 = const()[name = string("op_3981_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3981_cast_fp16 = rsqrt(epsilon = var_3981_epsilon_0, x = var_3980_cast_fp16)[name = string("op_3981_cast_fp16")]; tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = var_3981_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; tensor const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114944)))]; tensor q_67_cast_fp16 = mul(x = const_82_to_fp16, y = hidden_states_203_cast_fp16)[name = string("q_67_cast_fp16")]; fp16 var_3988_promoted_to_fp16 = const()[name = string("op_3988_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3994_cast_fp16 = pow(x = hidden_states_205_cast_fp16, y = var_3988_promoted_to_fp16)[name = string("op_3994_cast_fp16")]; tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_3994_cast_fp16)[name = string("variance_69_cast_fp16")]; fp16 var_3997_to_fp16 = const()[name = string("op_3997_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3998_cast_fp16 = add(x = variance_69_cast_fp16, y = var_3997_to_fp16)[name = string("op_3998_cast_fp16")]; fp32 var_3999_epsilon_0 = const()[name = string("op_3999_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3999_cast_fp16 = rsqrt(epsilon = var_3999_epsilon_0, x = var_3998_cast_fp16)[name = string("op_3999_cast_fp16")]; tensor hidden_states_209_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = var_3999_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; tensor const_83_to_fp16 = const()[name = string("const_83_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115264)))]; tensor k_67_cast_fp16 = mul(x = const_83_to_fp16, y = hidden_states_209_cast_fp16)[name = string("k_67_cast_fp16")]; tensor q_69_perm_0 = const()[name = string("q_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_69_perm_0 = const()[name = string("k_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_53_perm_0 = const()[name = string("v_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_69_cast_fp16 = transpose(perm = q_69_perm_0, x = q_67_cast_fp16)[name = string("transpose_79")]; tensor var_4016_cast_fp16 = mul(x = q_69_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4016_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_69_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_69_cast_fp16)[name = string("x2_33_cast_fp16")]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4037_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_4037_cast_fp16")]; int32 var_4039 = const()[name = string("op_4039"), val = int32(-1)]; bool var_4040_interleave_0 = const()[name = string("op_4040_interleave_0"), val = bool(false)]; tensor var_4040_cast_fp16 = concat(axis = var_4039, interleave = var_4040_interleave_0, values = (var_4037_cast_fp16, x1_33_cast_fp16))[name = string("op_4040_cast_fp16")]; tensor var_4041_cast_fp16 = mul(x = var_4040_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4041_cast_fp16")]; tensor q_71_cast_fp16 = add(x = var_4016_cast_fp16, y = var_4041_cast_fp16)[name = string("q_71_cast_fp16")]; tensor k_69_cast_fp16 = transpose(perm = k_69_perm_0, x = k_67_cast_fp16)[name = string("transpose_78")]; tensor var_4044_cast_fp16 = mul(x = k_69_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4044_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_69_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_69_cast_fp16)[name = string("x2_35_cast_fp16")]; fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4065_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_4065_cast_fp16")]; int32 var_4067 = const()[name = string("op_4067"), val = int32(-1)]; bool var_4068_interleave_0 = const()[name = string("op_4068_interleave_0"), val = bool(false)]; tensor var_4068_cast_fp16 = concat(axis = var_4067, interleave = var_4068_interleave_0, values = (var_4065_cast_fp16, x1_35_cast_fp16))[name = string("op_4068_cast_fp16")]; tensor var_4069_cast_fp16 = mul(x = var_4068_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4069_cast_fp16")]; tensor k_71_cast_fp16 = add(x = var_4044_cast_fp16, y = var_4069_cast_fp16)[name = string("k_71_cast_fp16")]; tensor var_4076 = const()[name = string("op_4076"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_17_cast_fp16 = reshape(shape = var_4076, x = k_71_cast_fp16)[name = string("nk_flat_17_cast_fp16")]; tensor var_4082 = const()[name = string("op_4082"), val = tensor([1, 1024, 1, 1])]; tensor v_53_cast_fp16 = transpose(perm = v_53_perm_0, x = v_51_cast_fp16)[name = string("transpose_77")]; tensor nv_flat_17_cast_fp16 = reshape(shape = var_4082, x = v_53_cast_fp16)[name = string("nv_flat_17_cast_fp16")]; tensor var_4091_cast_fp16 = mul(x = var_3885_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4091_cast_fp16")]; tensor var_4092_cast_fp16 = mul(x = nk_flat_17_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4092_cast_fp16")]; tensor key_cache_37_cast_fp16 = add(x = var_4091_cast_fp16, y = var_4092_cast_fp16)[name = string("key_cache_37_cast_fp16")]; tensor var_4098_cast_fp16 = mul(x = var_3905_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4098_cast_fp16")]; tensor var_4099_cast_fp16 = mul(x = nv_flat_17_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4099_cast_fp16")]; tensor value_cache_37_cast_fp16 = add(x = var_4098_cast_fp16, y = var_4099_cast_fp16)[name = string("value_cache_37_cast_fp16")]; tensor kc_49_axes_0 = const()[name = string("kc_49_axes_0"), val = tensor([2])]; tensor kc_49_cast_fp16 = squeeze(axes = kc_49_axes_0, x = key_cache_37_cast_fp16)[name = string("kc_49_cast_fp16")]; tensor var_4108 = const()[name = string("op_4108"), val = tensor([1, 8, 128, 256])]; tensor kc_51_cast_fp16 = reshape(shape = var_4108, x = kc_49_cast_fp16)[name = string("kc_51_cast_fp16")]; tensor vc_49_axes_0 = const()[name = string("vc_49_axes_0"), val = tensor([2])]; tensor vc_49_cast_fp16 = squeeze(axes = vc_49_axes_0, x = value_cache_37_cast_fp16)[name = string("vc_49_cast_fp16")]; tensor var_4116 = const()[name = string("op_4116"), val = tensor([1, 8, 128, 256])]; tensor vc_51_cast_fp16 = reshape(shape = var_4116, x = vc_49_cast_fp16)[name = string("vc_51_cast_fp16")]; tensor var_4119_axes_0 = const()[name = string("op_4119_axes_0"), val = tensor([2])]; tensor var_4119_cast_fp16 = expand_dims(axes = var_4119_axes_0, x = kc_51_cast_fp16)[name = string("op_4119_cast_fp16")]; tensor var_4127_reps_0 = const()[name = string("op_4127_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4127_cast_fp16 = tile(reps = var_4127_reps_0, x = var_4119_cast_fp16)[name = string("op_4127_cast_fp16")]; tensor var_4132 = const()[name = string("op_4132"), val = tensor([1, 16, 128, 256])]; tensor kc_53_cast_fp16 = reshape(shape = var_4132, x = var_4127_cast_fp16)[name = string("kc_53_cast_fp16")]; tensor var_4135_axes_0 = const()[name = string("op_4135_axes_0"), val = tensor([2])]; tensor var_4135_cast_fp16 = expand_dims(axes = var_4135_axes_0, x = vc_51_cast_fp16)[name = string("op_4135_cast_fp16")]; tensor var_4143_reps_0 = const()[name = string("op_4143_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4143_cast_fp16 = tile(reps = var_4143_reps_0, x = var_4135_cast_fp16)[name = string("op_4143_cast_fp16")]; tensor var_4148 = const()[name = string("op_4148"), val = tensor([1, 16, 128, 256])]; tensor vc_53_cast_fp16 = reshape(shape = var_4148, x = var_4143_cast_fp16)[name = string("vc_53_cast_fp16")]; bool var_4150_transpose_x_0 = const()[name = string("op_4150_transpose_x_0"), val = bool(false)]; bool var_4150_transpose_y_0 = const()[name = string("op_4150_transpose_y_0"), val = bool(false)]; tensor var_4150_cast_fp16 = matmul(transpose_x = var_4150_transpose_x_0, transpose_y = var_4150_transpose_y_0, x = q_71_cast_fp16, y = kc_53_cast_fp16)[name = string("op_4150_cast_fp16")]; fp16 _inversed_attn_weights_65_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_65_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_65_cast_fp16 = mul(x = var_4150_cast_fp16, y = _inversed_attn_weights_65_y_0_to_fp16)[name = string("_inversed_attn_weights_65_cast_fp16")]; tensor attn_weights_67_cast_fp16 = add(x = _inversed_attn_weights_65_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_67_cast_fp16")]; int32 var_4164 = const()[name = string("op_4164"), val = int32(-1)]; tensor attn_weights_71_cast_fp16 = softmax(axis = var_4164, x = attn_weights_67_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; bool attn_output_33_transpose_x_1 = const()[name = string("attn_output_33_transpose_x_1"), val = bool(false)]; bool attn_output_33_transpose_y_1 = const()[name = string("attn_output_33_transpose_y_1"), val = bool(true)]; tensor attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_1, transpose_y = attn_output_33_transpose_y_1, x = attn_weights_71_cast_fp16, y = vc_53_cast_fp16)[name = string("attn_output_33_cast_fp16")]; tensor var_4173_perm_0 = const()[name = string("op_4173_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4177 = const()[name = string("op_4177"), val = tensor([1, 1, -1])]; tensor var_4173_cast_fp16 = transpose(perm = var_4173_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_76")]; tensor input_83_cast_fp16 = reshape(shape = var_4177, x = var_4173_cast_fp16)[name = string("input_83_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132212800))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor var_4183_axes_0 = const()[name = string("op_4183_axes_0"), val = tensor([0])]; tensor var_4183_cast_fp16 = squeeze(axes = var_4183_axes_0, x = linear_59_cast_fp16)[name = string("op_4183_cast_fp16")]; tensor var_4185_axes_0 = const()[name = string("op_4185_axes_0"), val = tensor([0])]; tensor var_4185_cast_fp16 = squeeze(axes = var_4185_axes_0, x = var_4183_cast_fp16)[name = string("op_4185_cast_fp16")]; tensor var_4187_axes_0 = const()[name = string("op_4187_axes_0"), val = tensor([-1])]; tensor var_4187_cast_fp16 = expand_dims(axes = var_4187_axes_0, x = var_4185_cast_fp16)[name = string("op_4187_cast_fp16")]; tensor attn_4d_17_axes_0 = const()[name = string("attn_4d_17_axes_0"), val = tensor([-1])]; tensor attn_4d_17_cast_fp16 = expand_dims(axes = attn_4d_17_axes_0, x = var_4187_cast_fp16)[name = string("attn_4d_17_cast_fp16")]; tensor hidden_33_cast_fp16 = add(x = hidden_31_cast_fp16, y = attn_4d_17_cast_fp16)[name = string("hidden_33_cast_fp16")]; tensor var_4193_axes_0 = const()[name = string("op_4193_axes_0"), val = tensor([-1])]; tensor var_4193_cast_fp16 = squeeze(axes = var_4193_axes_0, x = hidden_33_cast_fp16)[name = string("op_4193_cast_fp16")]; tensor var_4195_axes_0 = const()[name = string("op_4195_axes_0"), val = tensor([-1])]; tensor var_4195_cast_fp16 = squeeze(axes = var_4195_axes_0, x = var_4193_cast_fp16)[name = string("op_4195_cast_fp16")]; tensor hidden_states_211_axes_0 = const()[name = string("hidden_states_211_axes_0"), val = tensor([0])]; tensor hidden_states_211_cast_fp16 = expand_dims(axes = hidden_states_211_axes_0, x = var_4195_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; fp16 var_4201_promoted_to_fp16 = const()[name = string("op_4201_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4207_cast_fp16 = pow(x = hidden_states_211_cast_fp16, y = var_4201_promoted_to_fp16)[name = string("op_4207_cast_fp16")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_4207_cast_fp16)[name = string("variance_71_cast_fp16")]; fp16 var_4210_to_fp16 = const()[name = string("op_4210_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4211_cast_fp16 = add(x = variance_71_cast_fp16, y = var_4210_to_fp16)[name = string("op_4211_cast_fp16")]; fp32 var_4212_epsilon_0 = const()[name = string("op_4212_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4212_cast_fp16 = rsqrt(epsilon = var_4212_epsilon_0, x = var_4211_cast_fp16)[name = string("op_4212_cast_fp16")]; tensor hidden_states_215_cast_fp16 = mul(x = hidden_states_211_cast_fp16, y = var_4212_cast_fp16)[name = string("hidden_states_215_cast_fp16")]; tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132213376)))]; tensor input_85_cast_fp16 = mul(x = const_90_to_fp16, y = hidden_states_215_cast_fp16)[name = string("input_85_cast_fp16")]; tensor layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132215488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361280))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_4222_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_4222_cast_fp16")]; tensor layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138507648))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_89_cast_fp16 = mul(x = var_4222_cast_fp16, y = linear_61_cast_fp16)[name = string("input_89_cast_fp16")]; tensor layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138508224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654016))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor var_4229_axes_0 = const()[name = string("op_4229_axes_0"), val = tensor([0])]; tensor var_4229_cast_fp16 = squeeze(axes = var_4229_axes_0, x = linear_62_cast_fp16)[name = string("op_4229_cast_fp16")]; tensor var_4231_axes_0 = const()[name = string("op_4231_axes_0"), val = tensor([0])]; tensor var_4231_cast_fp16 = squeeze(axes = var_4231_axes_0, x = var_4229_cast_fp16)[name = string("op_4231_cast_fp16")]; tensor var_4233_axes_0 = const()[name = string("op_4233_axes_0"), val = tensor([-1])]; tensor var_4233_cast_fp16 = expand_dims(axes = var_4233_axes_0, x = var_4231_cast_fp16)[name = string("op_4233_cast_fp16")]; tensor mlp_4d_17_axes_0 = const()[name = string("mlp_4d_17_axes_0"), val = tensor([-1])]; tensor mlp_4d_17_cast_fp16 = expand_dims(axes = mlp_4d_17_axes_0, x = var_4233_cast_fp16)[name = string("mlp_4d_17_cast_fp16")]; tensor hidden_35_cast_fp16 = add(x = hidden_33_cast_fp16, y = mlp_4d_17_cast_fp16)[name = string("hidden_35_cast_fp16")]; tensor var_4247_begin_0 = const()[name = string("op_4247_begin_0"), val = tensor([0, 9216, 0, 0])]; tensor var_4247_end_0 = const()[name = string("op_4247_end_0"), val = tensor([1, 10240, 1, 256])]; tensor var_4247_end_mask_0 = const()[name = string("op_4247_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4247_cast_fp16 = slice_by_index(begin = var_4247_begin_0, end = var_4247_end_0, end_mask = var_4247_end_mask_0, x = key_cache)[name = string("op_4247_cast_fp16")]; tensor var_4267_begin_0 = const()[name = string("op_4267_begin_0"), val = tensor([0, 9216, 0, 0])]; tensor var_4267_end_0 = const()[name = string("op_4267_end_0"), val = tensor([1, 10240, 1, 256])]; tensor var_4267_end_mask_0 = const()[name = string("op_4267_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4267_cast_fp16 = slice_by_index(begin = var_4267_begin_0, end = var_4267_end_0, end_mask = var_4267_end_mask_0, x = value_cache)[name = string("op_4267_cast_fp16")]; tensor var_4279_axes_0 = const()[name = string("op_4279_axes_0"), val = tensor([-1])]; tensor var_4279_cast_fp16 = squeeze(axes = var_4279_axes_0, x = hidden_35_cast_fp16)[name = string("op_4279_cast_fp16")]; tensor var_4281_axes_0 = const()[name = string("op_4281_axes_0"), val = tensor([-1])]; tensor var_4281_cast_fp16 = squeeze(axes = var_4281_axes_0, x = var_4279_cast_fp16)[name = string("op_4281_cast_fp16")]; tensor hidden_states_217_axes_0 = const()[name = string("hidden_states_217_axes_0"), val = tensor([0])]; tensor hidden_states_217_cast_fp16 = expand_dims(axes = hidden_states_217_axes_0, x = var_4281_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; fp16 var_4287_promoted_to_fp16 = const()[name = string("op_4287_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4293_cast_fp16 = pow(x = hidden_states_217_cast_fp16, y = var_4287_promoted_to_fp16)[name = string("op_4293_cast_fp16")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_4293_cast_fp16)[name = string("variance_73_cast_fp16")]; fp16 var_4296_to_fp16 = const()[name = string("op_4296_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4297_cast_fp16 = add(x = variance_73_cast_fp16, y = var_4296_to_fp16)[name = string("op_4297_cast_fp16")]; fp32 var_4298_epsilon_0 = const()[name = string("op_4298_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4298_cast_fp16 = rsqrt(epsilon = var_4298_epsilon_0, x = var_4297_cast_fp16)[name = string("op_4298_cast_fp16")]; tensor hidden_states_221_cast_fp16 = mul(x = hidden_states_217_cast_fp16, y = var_4298_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654592)))]; tensor input_91_cast_fp16 = mul(x = const_91_to_fp16, y = hidden_states_221_cast_fp16)[name = string("input_91_cast_fp16")]; tensor layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141656704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143753920))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143754496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803136))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852352))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_4315 = const()[name = string("op_4315"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_223_cast_fp16 = reshape(shape = var_4315, x = linear_63_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; tensor var_4321 = const()[name = string("op_4321"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_229_cast_fp16 = reshape(shape = var_4321, x = linear_64_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; tensor var_4327 = const()[name = string("op_4327"), val = tensor([1, 1, 8, 128])]; tensor v_57_cast_fp16 = reshape(shape = var_4327, x = linear_65_cast_fp16)[name = string("v_57_cast_fp16")]; fp16 var_4332_promoted_to_fp16 = const()[name = string("op_4332_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4338_cast_fp16 = pow(x = hidden_states_223_cast_fp16, y = var_4332_promoted_to_fp16)[name = string("op_4338_cast_fp16")]; tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_4338_cast_fp16)[name = string("variance_75_cast_fp16")]; fp16 var_4341_to_fp16 = const()[name = string("op_4341_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4342_cast_fp16 = add(x = variance_75_cast_fp16, y = var_4341_to_fp16)[name = string("op_4342_cast_fp16")]; fp32 var_4343_epsilon_0 = const()[name = string("op_4343_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4343_cast_fp16 = rsqrt(epsilon = var_4343_epsilon_0, x = var_4342_cast_fp16)[name = string("op_4343_cast_fp16")]; tensor hidden_states_227_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = var_4343_cast_fp16)[name = string("hidden_states_227_cast_fp16")]; tensor const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852928)))]; tensor q_75_cast_fp16 = mul(x = const_92_to_fp16, y = hidden_states_227_cast_fp16)[name = string("q_75_cast_fp16")]; fp16 var_4350_promoted_to_fp16 = const()[name = string("op_4350_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4356_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_4350_promoted_to_fp16)[name = string("op_4356_cast_fp16")]; tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_4356_cast_fp16)[name = string("variance_77_cast_fp16")]; fp16 var_4359_to_fp16 = const()[name = string("op_4359_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4360_cast_fp16 = add(x = variance_77_cast_fp16, y = var_4359_to_fp16)[name = string("op_4360_cast_fp16")]; fp32 var_4361_epsilon_0 = const()[name = string("op_4361_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4361_cast_fp16 = rsqrt(epsilon = var_4361_epsilon_0, x = var_4360_cast_fp16)[name = string("op_4361_cast_fp16")]; tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_4361_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853248)))]; tensor k_75_cast_fp16 = mul(x = const_93_to_fp16, y = hidden_states_233_cast_fp16)[name = string("k_75_cast_fp16")]; tensor q_77_perm_0 = const()[name = string("q_77_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_77_perm_0 = const()[name = string("k_77_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_59_perm_0 = const()[name = string("v_59_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_77_cast_fp16 = transpose(perm = q_77_perm_0, x = q_75_cast_fp16)[name = string("transpose_75")]; tensor var_4378_cast_fp16 = mul(x = q_77_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4378_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_77_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_77_cast_fp16)[name = string("x2_37_cast_fp16")]; fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4399_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_96_promoted_to_fp16)[name = string("op_4399_cast_fp16")]; int32 var_4401 = const()[name = string("op_4401"), val = int32(-1)]; bool var_4402_interleave_0 = const()[name = string("op_4402_interleave_0"), val = bool(false)]; tensor var_4402_cast_fp16 = concat(axis = var_4401, interleave = var_4402_interleave_0, values = (var_4399_cast_fp16, x1_37_cast_fp16))[name = string("op_4402_cast_fp16")]; tensor var_4403_cast_fp16 = mul(x = var_4402_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4403_cast_fp16")]; tensor q_79_cast_fp16 = add(x = var_4378_cast_fp16, y = var_4403_cast_fp16)[name = string("q_79_cast_fp16")]; tensor k_77_cast_fp16 = transpose(perm = k_77_perm_0, x = k_75_cast_fp16)[name = string("transpose_74")]; tensor var_4406_cast_fp16 = mul(x = k_77_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4406_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_77_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_77_cast_fp16)[name = string("x2_39_cast_fp16")]; fp16 const_99_promoted_to_fp16 = const()[name = string("const_99_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4427_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_99_promoted_to_fp16)[name = string("op_4427_cast_fp16")]; int32 var_4429 = const()[name = string("op_4429"), val = int32(-1)]; bool var_4430_interleave_0 = const()[name = string("op_4430_interleave_0"), val = bool(false)]; tensor var_4430_cast_fp16 = concat(axis = var_4429, interleave = var_4430_interleave_0, values = (var_4427_cast_fp16, x1_39_cast_fp16))[name = string("op_4430_cast_fp16")]; tensor var_4431_cast_fp16 = mul(x = var_4430_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4431_cast_fp16")]; tensor k_79_cast_fp16 = add(x = var_4406_cast_fp16, y = var_4431_cast_fp16)[name = string("k_79_cast_fp16")]; tensor var_4438 = const()[name = string("op_4438"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_19_cast_fp16 = reshape(shape = var_4438, x = k_79_cast_fp16)[name = string("nk_flat_19_cast_fp16")]; tensor var_4444 = const()[name = string("op_4444"), val = tensor([1, 1024, 1, 1])]; tensor v_59_cast_fp16 = transpose(perm = v_59_perm_0, x = v_57_cast_fp16)[name = string("transpose_73")]; tensor nv_flat_19_cast_fp16 = reshape(shape = var_4444, x = v_59_cast_fp16)[name = string("nv_flat_19_cast_fp16")]; tensor var_4453_cast_fp16 = mul(x = var_4247_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4453_cast_fp16")]; tensor var_4454_cast_fp16 = mul(x = nk_flat_19_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4454_cast_fp16")]; tensor key_cache_41_cast_fp16 = add(x = var_4453_cast_fp16, y = var_4454_cast_fp16)[name = string("key_cache_41_cast_fp16")]; tensor var_4460_cast_fp16 = mul(x = var_4267_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4460_cast_fp16")]; tensor var_4461_cast_fp16 = mul(x = nv_flat_19_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4461_cast_fp16")]; tensor value_cache_41_cast_fp16 = add(x = var_4460_cast_fp16, y = var_4461_cast_fp16)[name = string("value_cache_41_cast_fp16")]; tensor kc_55_axes_0 = const()[name = string("kc_55_axes_0"), val = tensor([2])]; tensor kc_55_cast_fp16 = squeeze(axes = kc_55_axes_0, x = key_cache_41_cast_fp16)[name = string("kc_55_cast_fp16")]; tensor var_4470 = const()[name = string("op_4470"), val = tensor([1, 8, 128, 256])]; tensor kc_57_cast_fp16 = reshape(shape = var_4470, x = kc_55_cast_fp16)[name = string("kc_57_cast_fp16")]; tensor vc_55_axes_0 = const()[name = string("vc_55_axes_0"), val = tensor([2])]; tensor vc_55_cast_fp16 = squeeze(axes = vc_55_axes_0, x = value_cache_41_cast_fp16)[name = string("vc_55_cast_fp16")]; tensor var_4478 = const()[name = string("op_4478"), val = tensor([1, 8, 128, 256])]; tensor vc_57_cast_fp16 = reshape(shape = var_4478, x = vc_55_cast_fp16)[name = string("vc_57_cast_fp16")]; tensor var_4481_axes_0 = const()[name = string("op_4481_axes_0"), val = tensor([2])]; tensor var_4481_cast_fp16 = expand_dims(axes = var_4481_axes_0, x = kc_57_cast_fp16)[name = string("op_4481_cast_fp16")]; tensor var_4489_reps_0 = const()[name = string("op_4489_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4489_cast_fp16 = tile(reps = var_4489_reps_0, x = var_4481_cast_fp16)[name = string("op_4489_cast_fp16")]; tensor var_4494 = const()[name = string("op_4494"), val = tensor([1, 16, 128, 256])]; tensor kc_59_cast_fp16 = reshape(shape = var_4494, x = var_4489_cast_fp16)[name = string("kc_59_cast_fp16")]; tensor var_4497_axes_0 = const()[name = string("op_4497_axes_0"), val = tensor([2])]; tensor var_4497_cast_fp16 = expand_dims(axes = var_4497_axes_0, x = vc_57_cast_fp16)[name = string("op_4497_cast_fp16")]; tensor var_4505_reps_0 = const()[name = string("op_4505_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4505_cast_fp16 = tile(reps = var_4505_reps_0, x = var_4497_cast_fp16)[name = string("op_4505_cast_fp16")]; tensor var_4510 = const()[name = string("op_4510"), val = tensor([1, 16, 128, 256])]; tensor vc_59_cast_fp16 = reshape(shape = var_4510, x = var_4505_cast_fp16)[name = string("vc_59_cast_fp16")]; bool var_4512_transpose_x_0 = const()[name = string("op_4512_transpose_x_0"), val = bool(false)]; bool var_4512_transpose_y_0 = const()[name = string("op_4512_transpose_y_0"), val = bool(false)]; tensor var_4512_cast_fp16 = matmul(transpose_x = var_4512_transpose_x_0, transpose_y = var_4512_transpose_y_0, x = q_79_cast_fp16, y = kc_59_cast_fp16)[name = string("op_4512_cast_fp16")]; fp16 _inversed_attn_weights_73_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_73_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_73_cast_fp16 = mul(x = var_4512_cast_fp16, y = _inversed_attn_weights_73_y_0_to_fp16)[name = string("_inversed_attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = _inversed_attn_weights_73_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_75_cast_fp16")]; int32 var_4526 = const()[name = string("op_4526"), val = int32(-1)]; tensor attn_weights_79_cast_fp16 = softmax(axis = var_4526, x = attn_weights_75_cast_fp16)[name = string("attn_weights_79_cast_fp16")]; bool attn_output_37_transpose_x_1 = const()[name = string("attn_output_37_transpose_x_1"), val = bool(false)]; bool attn_output_37_transpose_y_1 = const()[name = string("attn_output_37_transpose_y_1"), val = bool(true)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_1, transpose_y = attn_output_37_transpose_y_1, x = attn_weights_79_cast_fp16, y = vc_59_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_4535_perm_0 = const()[name = string("op_4535_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4539 = const()[name = string("op_4539"), val = tensor([1, 1, -1])]; tensor var_4535_cast_fp16 = transpose(perm = var_4535_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_72")]; tensor input_93_cast_fp16 = reshape(shape = var_4539, x = var_4535_cast_fp16)[name = string("input_93_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147950784))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor var_4545_axes_0 = const()[name = string("op_4545_axes_0"), val = tensor([0])]; tensor var_4545_cast_fp16 = squeeze(axes = var_4545_axes_0, x = linear_66_cast_fp16)[name = string("op_4545_cast_fp16")]; tensor var_4547_axes_0 = const()[name = string("op_4547_axes_0"), val = tensor([0])]; tensor var_4547_cast_fp16 = squeeze(axes = var_4547_axes_0, x = var_4545_cast_fp16)[name = string("op_4547_cast_fp16")]; tensor var_4549_axes_0 = const()[name = string("op_4549_axes_0"), val = tensor([-1])]; tensor var_4549_cast_fp16 = expand_dims(axes = var_4549_axes_0, x = var_4547_cast_fp16)[name = string("op_4549_cast_fp16")]; tensor attn_4d_19_axes_0 = const()[name = string("attn_4d_19_axes_0"), val = tensor([-1])]; tensor attn_4d_19_cast_fp16 = expand_dims(axes = attn_4d_19_axes_0, x = var_4549_cast_fp16)[name = string("attn_4d_19_cast_fp16")]; tensor hidden_37_cast_fp16 = add(x = hidden_35_cast_fp16, y = attn_4d_19_cast_fp16)[name = string("hidden_37_cast_fp16")]; tensor var_4555_axes_0 = const()[name = string("op_4555_axes_0"), val = tensor([-1])]; tensor var_4555_cast_fp16 = squeeze(axes = var_4555_axes_0, x = hidden_37_cast_fp16)[name = string("op_4555_cast_fp16")]; tensor var_4557_axes_0 = const()[name = string("op_4557_axes_0"), val = tensor([-1])]; tensor var_4557_cast_fp16 = squeeze(axes = var_4557_axes_0, x = var_4555_cast_fp16)[name = string("op_4557_cast_fp16")]; tensor hidden_states_235_axes_0 = const()[name = string("hidden_states_235_axes_0"), val = tensor([0])]; tensor hidden_states_235_cast_fp16 = expand_dims(axes = hidden_states_235_axes_0, x = var_4557_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; fp16 var_4563_promoted_to_fp16 = const()[name = string("op_4563_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4569_cast_fp16 = pow(x = hidden_states_235_cast_fp16, y = var_4563_promoted_to_fp16)[name = string("op_4569_cast_fp16")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_4569_cast_fp16)[name = string("variance_79_cast_fp16")]; fp16 var_4572_to_fp16 = const()[name = string("op_4572_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4573_cast_fp16 = add(x = variance_79_cast_fp16, y = var_4572_to_fp16)[name = string("op_4573_cast_fp16")]; fp32 var_4574_epsilon_0 = const()[name = string("op_4574_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4574_cast_fp16 = rsqrt(epsilon = var_4574_epsilon_0, x = var_4573_cast_fp16)[name = string("op_4574_cast_fp16")]; tensor hidden_states_239_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = var_4574_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147951360)))]; tensor input_95_cast_fp16 = mul(x = const_100_to_fp16, y = hidden_states_239_cast_fp16)[name = string("input_95_cast_fp16")]; tensor layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147953472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099264))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_4584_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_4584_cast_fp16")]; tensor layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154245632))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_99_cast_fp16 = mul(x = var_4584_cast_fp16, y = linear_68_cast_fp16)[name = string("input_99_cast_fp16")]; tensor layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154246208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392000))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor var_4591_axes_0 = const()[name = string("op_4591_axes_0"), val = tensor([0])]; tensor var_4591_cast_fp16 = squeeze(axes = var_4591_axes_0, x = linear_69_cast_fp16)[name = string("op_4591_cast_fp16")]; tensor var_4593_axes_0 = const()[name = string("op_4593_axes_0"), val = tensor([0])]; tensor var_4593_cast_fp16 = squeeze(axes = var_4593_axes_0, x = var_4591_cast_fp16)[name = string("op_4593_cast_fp16")]; tensor var_4595_axes_0 = const()[name = string("op_4595_axes_0"), val = tensor([-1])]; tensor var_4595_cast_fp16 = expand_dims(axes = var_4595_axes_0, x = var_4593_cast_fp16)[name = string("op_4595_cast_fp16")]; tensor mlp_4d_19_axes_0 = const()[name = string("mlp_4d_19_axes_0"), val = tensor([-1])]; tensor mlp_4d_19_cast_fp16 = expand_dims(axes = mlp_4d_19_axes_0, x = var_4595_cast_fp16)[name = string("mlp_4d_19_cast_fp16")]; tensor hidden_39_cast_fp16 = add(x = hidden_37_cast_fp16, y = mlp_4d_19_cast_fp16)[name = string("hidden_39_cast_fp16")]; tensor var_4609_begin_0 = const()[name = string("op_4609_begin_0"), val = tensor([0, 10240, 0, 0])]; tensor var_4609_end_0 = const()[name = string("op_4609_end_0"), val = tensor([1, 11264, 1, 256])]; tensor var_4609_end_mask_0 = const()[name = string("op_4609_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4609_cast_fp16 = slice_by_index(begin = var_4609_begin_0, end = var_4609_end_0, end_mask = var_4609_end_mask_0, x = key_cache)[name = string("op_4609_cast_fp16")]; tensor var_4629_begin_0 = const()[name = string("op_4629_begin_0"), val = tensor([0, 10240, 0, 0])]; tensor var_4629_end_0 = const()[name = string("op_4629_end_0"), val = tensor([1, 11264, 1, 256])]; tensor var_4629_end_mask_0 = const()[name = string("op_4629_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4629_cast_fp16 = slice_by_index(begin = var_4629_begin_0, end = var_4629_end_0, end_mask = var_4629_end_mask_0, x = value_cache)[name = string("op_4629_cast_fp16")]; tensor var_4641_axes_0 = const()[name = string("op_4641_axes_0"), val = tensor([-1])]; tensor var_4641_cast_fp16 = squeeze(axes = var_4641_axes_0, x = hidden_39_cast_fp16)[name = string("op_4641_cast_fp16")]; tensor var_4643_axes_0 = const()[name = string("op_4643_axes_0"), val = tensor([-1])]; tensor var_4643_cast_fp16 = squeeze(axes = var_4643_axes_0, x = var_4641_cast_fp16)[name = string("op_4643_cast_fp16")]; tensor hidden_states_241_axes_0 = const()[name = string("hidden_states_241_axes_0"), val = tensor([0])]; tensor hidden_states_241_cast_fp16 = expand_dims(axes = hidden_states_241_axes_0, x = var_4643_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; fp16 var_4649_promoted_to_fp16 = const()[name = string("op_4649_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4655_cast_fp16 = pow(x = hidden_states_241_cast_fp16, y = var_4649_promoted_to_fp16)[name = string("op_4655_cast_fp16")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_4655_cast_fp16)[name = string("variance_81_cast_fp16")]; fp16 var_4658_to_fp16 = const()[name = string("op_4658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4659_cast_fp16 = add(x = variance_81_cast_fp16, y = var_4658_to_fp16)[name = string("op_4659_cast_fp16")]; fp32 var_4660_epsilon_0 = const()[name = string("op_4660_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4660_cast_fp16 = rsqrt(epsilon = var_4660_epsilon_0, x = var_4659_cast_fp16)[name = string("op_4660_cast_fp16")]; tensor hidden_states_245_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = var_4660_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392576)))]; tensor input_101_cast_fp16 = mul(x = const_101_to_fp16, y = hidden_states_245_cast_fp16)[name = string("input_101_cast_fp16")]; tensor layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157394688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159491904))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159492480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541120))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590336))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_4677 = const()[name = string("op_4677"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_247_cast_fp16 = reshape(shape = var_4677, x = linear_70_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; tensor var_4683 = const()[name = string("op_4683"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_253_cast_fp16 = reshape(shape = var_4683, x = linear_71_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; tensor var_4689 = const()[name = string("op_4689"), val = tensor([1, 1, 8, 128])]; tensor v_63_cast_fp16 = reshape(shape = var_4689, x = linear_72_cast_fp16)[name = string("v_63_cast_fp16")]; fp16 var_4694_promoted_to_fp16 = const()[name = string("op_4694_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4700_cast_fp16 = pow(x = hidden_states_247_cast_fp16, y = var_4694_promoted_to_fp16)[name = string("op_4700_cast_fp16")]; tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_4700_cast_fp16)[name = string("variance_83_cast_fp16")]; fp16 var_4703_to_fp16 = const()[name = string("op_4703_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4704_cast_fp16 = add(x = variance_83_cast_fp16, y = var_4703_to_fp16)[name = string("op_4704_cast_fp16")]; fp32 var_4705_epsilon_0 = const()[name = string("op_4705_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4705_cast_fp16 = rsqrt(epsilon = var_4705_epsilon_0, x = var_4704_cast_fp16)[name = string("op_4705_cast_fp16")]; tensor hidden_states_251_cast_fp16 = mul(x = hidden_states_247_cast_fp16, y = var_4705_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; tensor const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590912)))]; tensor q_83_cast_fp16 = mul(x = const_102_to_fp16, y = hidden_states_251_cast_fp16)[name = string("q_83_cast_fp16")]; fp16 var_4712_promoted_to_fp16 = const()[name = string("op_4712_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4718_cast_fp16 = pow(x = hidden_states_253_cast_fp16, y = var_4712_promoted_to_fp16)[name = string("op_4718_cast_fp16")]; tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_4718_cast_fp16)[name = string("variance_85_cast_fp16")]; fp16 var_4721_to_fp16 = const()[name = string("op_4721_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4722_cast_fp16 = add(x = variance_85_cast_fp16, y = var_4721_to_fp16)[name = string("op_4722_cast_fp16")]; fp32 var_4723_epsilon_0 = const()[name = string("op_4723_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4723_cast_fp16 = rsqrt(epsilon = var_4723_epsilon_0, x = var_4722_cast_fp16)[name = string("op_4723_cast_fp16")]; tensor hidden_states_257_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = var_4723_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591232)))]; tensor k_83_cast_fp16 = mul(x = const_103_to_fp16, y = hidden_states_257_cast_fp16)[name = string("k_83_cast_fp16")]; tensor q_85_perm_0 = const()[name = string("q_85_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_85_perm_0 = const()[name = string("k_85_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_65_perm_0 = const()[name = string("v_65_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_85_cast_fp16 = transpose(perm = q_85_perm_0, x = q_83_cast_fp16)[name = string("transpose_71")]; tensor var_4740_cast_fp16 = mul(x = q_85_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4740_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_85_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_85_cast_fp16)[name = string("x2_41_cast_fp16")]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4761_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_4761_cast_fp16")]; int32 var_4763 = const()[name = string("op_4763"), val = int32(-1)]; bool var_4764_interleave_0 = const()[name = string("op_4764_interleave_0"), val = bool(false)]; tensor var_4764_cast_fp16 = concat(axis = var_4763, interleave = var_4764_interleave_0, values = (var_4761_cast_fp16, x1_41_cast_fp16))[name = string("op_4764_cast_fp16")]; tensor var_4765_cast_fp16 = mul(x = var_4764_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4765_cast_fp16")]; tensor q_87_cast_fp16 = add(x = var_4740_cast_fp16, y = var_4765_cast_fp16)[name = string("q_87_cast_fp16")]; tensor k_85_cast_fp16 = transpose(perm = k_85_perm_0, x = k_83_cast_fp16)[name = string("transpose_70")]; tensor var_4768_cast_fp16 = mul(x = k_85_cast_fp16, y = cos_3_cast_fp16)[name = string("op_4768_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_85_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_85_cast_fp16)[name = string("x2_43_cast_fp16")]; fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4789_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_109_promoted_to_fp16)[name = string("op_4789_cast_fp16")]; int32 var_4791 = const()[name = string("op_4791"), val = int32(-1)]; bool var_4792_interleave_0 = const()[name = string("op_4792_interleave_0"), val = bool(false)]; tensor var_4792_cast_fp16 = concat(axis = var_4791, interleave = var_4792_interleave_0, values = (var_4789_cast_fp16, x1_43_cast_fp16))[name = string("op_4792_cast_fp16")]; tensor var_4793_cast_fp16 = mul(x = var_4792_cast_fp16, y = sin_3_cast_fp16)[name = string("op_4793_cast_fp16")]; tensor k_87_cast_fp16 = add(x = var_4768_cast_fp16, y = var_4793_cast_fp16)[name = string("k_87_cast_fp16")]; tensor var_4800 = const()[name = string("op_4800"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_21_cast_fp16 = reshape(shape = var_4800, x = k_87_cast_fp16)[name = string("nk_flat_21_cast_fp16")]; tensor var_4806 = const()[name = string("op_4806"), val = tensor([1, 1024, 1, 1])]; tensor v_65_cast_fp16 = transpose(perm = v_65_perm_0, x = v_63_cast_fp16)[name = string("transpose_69")]; tensor nv_flat_21_cast_fp16 = reshape(shape = var_4806, x = v_65_cast_fp16)[name = string("nv_flat_21_cast_fp16")]; tensor var_4815_cast_fp16 = mul(x = var_4609_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4815_cast_fp16")]; tensor var_4816_cast_fp16 = mul(x = nk_flat_21_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4816_cast_fp16")]; tensor key_cache_45_cast_fp16 = add(x = var_4815_cast_fp16, y = var_4816_cast_fp16)[name = string("key_cache_45_cast_fp16")]; tensor var_4822_cast_fp16 = mul(x = var_4629_cast_fp16, y = var_1194_cast_fp16)[name = string("op_4822_cast_fp16")]; tensor var_4823_cast_fp16 = mul(x = nv_flat_21_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_4823_cast_fp16")]; tensor value_cache_45_cast_fp16 = add(x = var_4822_cast_fp16, y = var_4823_cast_fp16)[name = string("value_cache_45_cast_fp16")]; tensor kc_61_axes_0 = const()[name = string("kc_61_axes_0"), val = tensor([2])]; tensor kc_61_cast_fp16 = squeeze(axes = kc_61_axes_0, x = key_cache_45_cast_fp16)[name = string("kc_61_cast_fp16")]; tensor var_4832 = const()[name = string("op_4832"), val = tensor([1, 8, 128, 256])]; tensor kc_63_cast_fp16 = reshape(shape = var_4832, x = kc_61_cast_fp16)[name = string("kc_63_cast_fp16")]; tensor vc_61_axes_0 = const()[name = string("vc_61_axes_0"), val = tensor([2])]; tensor vc_61_cast_fp16 = squeeze(axes = vc_61_axes_0, x = value_cache_45_cast_fp16)[name = string("vc_61_cast_fp16")]; tensor var_4840 = const()[name = string("op_4840"), val = tensor([1, 8, 128, 256])]; tensor vc_63_cast_fp16 = reshape(shape = var_4840, x = vc_61_cast_fp16)[name = string("vc_63_cast_fp16")]; tensor var_4843_axes_0 = const()[name = string("op_4843_axes_0"), val = tensor([2])]; tensor var_4843_cast_fp16 = expand_dims(axes = var_4843_axes_0, x = kc_63_cast_fp16)[name = string("op_4843_cast_fp16")]; tensor var_4851_reps_0 = const()[name = string("op_4851_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4851_cast_fp16 = tile(reps = var_4851_reps_0, x = var_4843_cast_fp16)[name = string("op_4851_cast_fp16")]; tensor var_4856 = const()[name = string("op_4856"), val = tensor([1, 16, 128, 256])]; tensor kc_65_cast_fp16 = reshape(shape = var_4856, x = var_4851_cast_fp16)[name = string("kc_65_cast_fp16")]; tensor var_4859_axes_0 = const()[name = string("op_4859_axes_0"), val = tensor([2])]; tensor var_4859_cast_fp16 = expand_dims(axes = var_4859_axes_0, x = vc_63_cast_fp16)[name = string("op_4859_cast_fp16")]; tensor var_4867_reps_0 = const()[name = string("op_4867_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4867_cast_fp16 = tile(reps = var_4867_reps_0, x = var_4859_cast_fp16)[name = string("op_4867_cast_fp16")]; tensor var_4872 = const()[name = string("op_4872"), val = tensor([1, 16, 128, 256])]; tensor vc_65_cast_fp16 = reshape(shape = var_4872, x = var_4867_cast_fp16)[name = string("vc_65_cast_fp16")]; bool var_4874_transpose_x_0 = const()[name = string("op_4874_transpose_x_0"), val = bool(false)]; bool var_4874_transpose_y_0 = const()[name = string("op_4874_transpose_y_0"), val = bool(false)]; tensor var_4874_cast_fp16 = matmul(transpose_x = var_4874_transpose_x_0, transpose_y = var_4874_transpose_y_0, x = q_87_cast_fp16, y = kc_65_cast_fp16)[name = string("op_4874_cast_fp16")]; fp16 _inversed_attn_weights_81_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_81_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_81_cast_fp16 = mul(x = var_4874_cast_fp16, y = _inversed_attn_weights_81_y_0_to_fp16)[name = string("_inversed_attn_weights_81_cast_fp16")]; tensor attn_weights_83_cast_fp16 = add(x = _inversed_attn_weights_81_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; int32 var_4888 = const()[name = string("op_4888"), val = int32(-1)]; tensor attn_weights_87_cast_fp16 = softmax(axis = var_4888, x = attn_weights_83_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; bool attn_output_41_transpose_x_1 = const()[name = string("attn_output_41_transpose_x_1"), val = bool(false)]; bool attn_output_41_transpose_y_1 = const()[name = string("attn_output_41_transpose_y_1"), val = bool(true)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_1, transpose_y = attn_output_41_transpose_y_1, x = attn_weights_87_cast_fp16, y = vc_65_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_4897_perm_0 = const()[name = string("op_4897_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4901 = const()[name = string("op_4901"), val = tensor([1, 1, -1])]; tensor var_4897_cast_fp16 = transpose(perm = var_4897_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_68")]; tensor input_103_cast_fp16 = reshape(shape = var_4901, x = var_4897_cast_fp16)[name = string("input_103_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163688768))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor var_4907_axes_0 = const()[name = string("op_4907_axes_0"), val = tensor([0])]; tensor var_4907_cast_fp16 = squeeze(axes = var_4907_axes_0, x = linear_73_cast_fp16)[name = string("op_4907_cast_fp16")]; tensor var_4909_axes_0 = const()[name = string("op_4909_axes_0"), val = tensor([0])]; tensor var_4909_cast_fp16 = squeeze(axes = var_4909_axes_0, x = var_4907_cast_fp16)[name = string("op_4909_cast_fp16")]; tensor var_4911_axes_0 = const()[name = string("op_4911_axes_0"), val = tensor([-1])]; tensor var_4911_cast_fp16 = expand_dims(axes = var_4911_axes_0, x = var_4909_cast_fp16)[name = string("op_4911_cast_fp16")]; tensor attn_4d_21_axes_0 = const()[name = string("attn_4d_21_axes_0"), val = tensor([-1])]; tensor attn_4d_21_cast_fp16 = expand_dims(axes = attn_4d_21_axes_0, x = var_4911_cast_fp16)[name = string("attn_4d_21_cast_fp16")]; tensor hidden_41_cast_fp16 = add(x = hidden_39_cast_fp16, y = attn_4d_21_cast_fp16)[name = string("hidden_41_cast_fp16")]; tensor var_4917_axes_0 = const()[name = string("op_4917_axes_0"), val = tensor([-1])]; tensor var_4917_cast_fp16 = squeeze(axes = var_4917_axes_0, x = hidden_41_cast_fp16)[name = string("op_4917_cast_fp16")]; tensor var_4919_axes_0 = const()[name = string("op_4919_axes_0"), val = tensor([-1])]; tensor var_4919_cast_fp16 = squeeze(axes = var_4919_axes_0, x = var_4917_cast_fp16)[name = string("op_4919_cast_fp16")]; tensor hidden_states_259_axes_0 = const()[name = string("hidden_states_259_axes_0"), val = tensor([0])]; tensor hidden_states_259_cast_fp16 = expand_dims(axes = hidden_states_259_axes_0, x = var_4919_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; fp16 var_4925_promoted_to_fp16 = const()[name = string("op_4925_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4931_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_4925_promoted_to_fp16)[name = string("op_4931_cast_fp16")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_4931_cast_fp16)[name = string("variance_87_cast_fp16")]; fp16 var_4934_to_fp16 = const()[name = string("op_4934_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4935_cast_fp16 = add(x = variance_87_cast_fp16, y = var_4934_to_fp16)[name = string("op_4935_cast_fp16")]; fp32 var_4936_epsilon_0 = const()[name = string("op_4936_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4936_cast_fp16 = rsqrt(epsilon = var_4936_epsilon_0, x = var_4935_cast_fp16)[name = string("op_4936_cast_fp16")]; tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_4936_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; tensor const_110_to_fp16 = const()[name = string("const_110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689344)))]; tensor input_105_cast_fp16 = mul(x = const_110_to_fp16, y = hidden_states_263_cast_fp16)[name = string("input_105_cast_fp16")]; tensor layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163691456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837248))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_4946_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_4946_cast_fp16")]; tensor layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169983616))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_109_cast_fp16 = mul(x = var_4946_cast_fp16, y = linear_75_cast_fp16)[name = string("input_109_cast_fp16")]; tensor layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169984192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173129984))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor var_4953_axes_0 = const()[name = string("op_4953_axes_0"), val = tensor([0])]; tensor var_4953_cast_fp16 = squeeze(axes = var_4953_axes_0, x = linear_76_cast_fp16)[name = string("op_4953_cast_fp16")]; tensor var_4955_axes_0 = const()[name = string("op_4955_axes_0"), val = tensor([0])]; tensor var_4955_cast_fp16 = squeeze(axes = var_4955_axes_0, x = var_4953_cast_fp16)[name = string("op_4955_cast_fp16")]; tensor var_4957_axes_0 = const()[name = string("op_4957_axes_0"), val = tensor([-1])]; tensor var_4957_cast_fp16 = expand_dims(axes = var_4957_axes_0, x = var_4955_cast_fp16)[name = string("op_4957_cast_fp16")]; tensor mlp_4d_21_axes_0 = const()[name = string("mlp_4d_21_axes_0"), val = tensor([-1])]; tensor mlp_4d_21_cast_fp16 = expand_dims(axes = mlp_4d_21_axes_0, x = var_4957_cast_fp16)[name = string("mlp_4d_21_cast_fp16")]; tensor hidden_43_cast_fp16 = add(x = hidden_41_cast_fp16, y = mlp_4d_21_cast_fp16)[name = string("hidden_43_cast_fp16")]; tensor var_4971_begin_0 = const()[name = string("op_4971_begin_0"), val = tensor([0, 11264, 0, 0])]; tensor var_4971_end_0 = const()[name = string("op_4971_end_0"), val = tensor([1, 12288, 1, 256])]; tensor var_4971_end_mask_0 = const()[name = string("op_4971_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4971_cast_fp16 = slice_by_index(begin = var_4971_begin_0, end = var_4971_end_0, end_mask = var_4971_end_mask_0, x = key_cache)[name = string("op_4971_cast_fp16")]; tensor var_4991_begin_0 = const()[name = string("op_4991_begin_0"), val = tensor([0, 11264, 0, 0])]; tensor var_4991_end_0 = const()[name = string("op_4991_end_0"), val = tensor([1, 12288, 1, 256])]; tensor var_4991_end_mask_0 = const()[name = string("op_4991_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4991_cast_fp16 = slice_by_index(begin = var_4991_begin_0, end = var_4991_end_0, end_mask = var_4991_end_mask_0, x = value_cache)[name = string("op_4991_cast_fp16")]; tensor var_5003_axes_0 = const()[name = string("op_5003_axes_0"), val = tensor([-1])]; tensor var_5003_cast_fp16 = squeeze(axes = var_5003_axes_0, x = hidden_43_cast_fp16)[name = string("op_5003_cast_fp16")]; tensor var_5005_axes_0 = const()[name = string("op_5005_axes_0"), val = tensor([-1])]; tensor var_5005_cast_fp16 = squeeze(axes = var_5005_axes_0, x = var_5003_cast_fp16)[name = string("op_5005_cast_fp16")]; tensor hidden_states_265_axes_0 = const()[name = string("hidden_states_265_axes_0"), val = tensor([0])]; tensor hidden_states_265_cast_fp16 = expand_dims(axes = hidden_states_265_axes_0, x = var_5005_cast_fp16)[name = string("hidden_states_265_cast_fp16")]; fp16 var_5011_promoted_to_fp16 = const()[name = string("op_5011_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5017_cast_fp16 = pow(x = hidden_states_265_cast_fp16, y = var_5011_promoted_to_fp16)[name = string("op_5017_cast_fp16")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_5017_cast_fp16)[name = string("variance_89_cast_fp16")]; fp16 var_5020_to_fp16 = const()[name = string("op_5020_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5021_cast_fp16 = add(x = variance_89_cast_fp16, y = var_5020_to_fp16)[name = string("op_5021_cast_fp16")]; fp32 var_5022_epsilon_0 = const()[name = string("op_5022_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5022_cast_fp16 = rsqrt(epsilon = var_5022_epsilon_0, x = var_5021_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor hidden_states_269_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = var_5022_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; tensor const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173130560)))]; tensor input_111_cast_fp16 = mul(x = const_111_to_fp16, y = hidden_states_269_cast_fp16)[name = string("input_111_cast_fp16")]; tensor layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175229888))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175230464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279104))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328320))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor var_5039 = const()[name = string("op_5039"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_271_cast_fp16 = reshape(shape = var_5039, x = linear_77_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; tensor var_5045 = const()[name = string("op_5045"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_277_cast_fp16 = reshape(shape = var_5045, x = linear_78_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; tensor var_5051 = const()[name = string("op_5051"), val = tensor([1, 1, 8, 128])]; tensor v_69_cast_fp16 = reshape(shape = var_5051, x = linear_79_cast_fp16)[name = string("v_69_cast_fp16")]; fp16 var_5056_promoted_to_fp16 = const()[name = string("op_5056_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5062_cast_fp16 = pow(x = hidden_states_271_cast_fp16, y = var_5056_promoted_to_fp16)[name = string("op_5062_cast_fp16")]; tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_5062_cast_fp16)[name = string("variance_91_cast_fp16")]; fp16 var_5065_to_fp16 = const()[name = string("op_5065_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5066_cast_fp16 = add(x = variance_91_cast_fp16, y = var_5065_to_fp16)[name = string("op_5066_cast_fp16")]; fp32 var_5067_epsilon_0 = const()[name = string("op_5067_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5067_cast_fp16 = rsqrt(epsilon = var_5067_epsilon_0, x = var_5066_cast_fp16)[name = string("op_5067_cast_fp16")]; tensor hidden_states_275_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = var_5067_cast_fp16)[name = string("hidden_states_275_cast_fp16")]; tensor const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328896)))]; tensor q_91_cast_fp16 = mul(x = const_112_to_fp16, y = hidden_states_275_cast_fp16)[name = string("q_91_cast_fp16")]; fp16 var_5074_promoted_to_fp16 = const()[name = string("op_5074_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5080_cast_fp16 = pow(x = hidden_states_277_cast_fp16, y = var_5074_promoted_to_fp16)[name = string("op_5080_cast_fp16")]; tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_5080_cast_fp16)[name = string("variance_93_cast_fp16")]; fp16 var_5083_to_fp16 = const()[name = string("op_5083_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5084_cast_fp16 = add(x = variance_93_cast_fp16, y = var_5083_to_fp16)[name = string("op_5084_cast_fp16")]; fp32 var_5085_epsilon_0 = const()[name = string("op_5085_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5085_cast_fp16 = rsqrt(epsilon = var_5085_epsilon_0, x = var_5084_cast_fp16)[name = string("op_5085_cast_fp16")]; tensor hidden_states_281_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = var_5085_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; tensor const_113_to_fp16 = const()[name = string("const_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329216)))]; tensor k_91_cast_fp16 = mul(x = const_113_to_fp16, y = hidden_states_281_cast_fp16)[name = string("k_91_cast_fp16")]; tensor q_93_perm_0 = const()[name = string("q_93_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_93_perm_0 = const()[name = string("k_93_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_71_perm_0 = const()[name = string("v_71_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_93_cast_fp16 = transpose(perm = q_93_perm_0, x = q_91_cast_fp16)[name = string("transpose_67")]; tensor var_5102_cast_fp16 = mul(x = q_93_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5102_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_93_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_93_cast_fp16)[name = string("x2_45_cast_fp16")]; fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5123_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_5123_cast_fp16")]; int32 var_5125 = const()[name = string("op_5125"), val = int32(-1)]; bool var_5126_interleave_0 = const()[name = string("op_5126_interleave_0"), val = bool(false)]; tensor var_5126_cast_fp16 = concat(axis = var_5125, interleave = var_5126_interleave_0, values = (var_5123_cast_fp16, x1_45_cast_fp16))[name = string("op_5126_cast_fp16")]; tensor var_5127_cast_fp16 = mul(x = var_5126_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5127_cast_fp16")]; tensor q_95_cast_fp16 = add(x = var_5102_cast_fp16, y = var_5127_cast_fp16)[name = string("q_95_cast_fp16")]; tensor k_93_cast_fp16 = transpose(perm = k_93_perm_0, x = k_91_cast_fp16)[name = string("transpose_66")]; tensor var_5130_cast_fp16 = mul(x = k_93_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5130_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_93_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_93_cast_fp16)[name = string("x2_47_cast_fp16")]; fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5151_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_119_promoted_to_fp16)[name = string("op_5151_cast_fp16")]; int32 var_5153 = const()[name = string("op_5153"), val = int32(-1)]; bool var_5154_interleave_0 = const()[name = string("op_5154_interleave_0"), val = bool(false)]; tensor var_5154_cast_fp16 = concat(axis = var_5153, interleave = var_5154_interleave_0, values = (var_5151_cast_fp16, x1_47_cast_fp16))[name = string("op_5154_cast_fp16")]; tensor var_5155_cast_fp16 = mul(x = var_5154_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5155_cast_fp16")]; tensor k_95_cast_fp16 = add(x = var_5130_cast_fp16, y = var_5155_cast_fp16)[name = string("k_95_cast_fp16")]; tensor var_5162 = const()[name = string("op_5162"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_23_cast_fp16 = reshape(shape = var_5162, x = k_95_cast_fp16)[name = string("nk_flat_23_cast_fp16")]; tensor var_5168 = const()[name = string("op_5168"), val = tensor([1, 1024, 1, 1])]; tensor v_71_cast_fp16 = transpose(perm = v_71_perm_0, x = v_69_cast_fp16)[name = string("transpose_65")]; tensor nv_flat_23_cast_fp16 = reshape(shape = var_5168, x = v_71_cast_fp16)[name = string("nv_flat_23_cast_fp16")]; tensor var_5177_cast_fp16 = mul(x = var_4971_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5177_cast_fp16")]; tensor var_5178_cast_fp16 = mul(x = nk_flat_23_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5178_cast_fp16")]; tensor key_cache_49_cast_fp16 = add(x = var_5177_cast_fp16, y = var_5178_cast_fp16)[name = string("key_cache_49_cast_fp16")]; tensor var_5184_cast_fp16 = mul(x = var_4991_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5184_cast_fp16")]; tensor var_5185_cast_fp16 = mul(x = nv_flat_23_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5185_cast_fp16")]; tensor value_cache_49_cast_fp16 = add(x = var_5184_cast_fp16, y = var_5185_cast_fp16)[name = string("value_cache_49_cast_fp16")]; tensor kc_67_axes_0 = const()[name = string("kc_67_axes_0"), val = tensor([2])]; tensor kc_67_cast_fp16 = squeeze(axes = kc_67_axes_0, x = key_cache_49_cast_fp16)[name = string("kc_67_cast_fp16")]; tensor var_5194 = const()[name = string("op_5194"), val = tensor([1, 8, 128, 256])]; tensor kc_69_cast_fp16 = reshape(shape = var_5194, x = kc_67_cast_fp16)[name = string("kc_69_cast_fp16")]; tensor vc_67_axes_0 = const()[name = string("vc_67_axes_0"), val = tensor([2])]; tensor vc_67_cast_fp16 = squeeze(axes = vc_67_axes_0, x = value_cache_49_cast_fp16)[name = string("vc_67_cast_fp16")]; tensor var_5202 = const()[name = string("op_5202"), val = tensor([1, 8, 128, 256])]; tensor vc_69_cast_fp16 = reshape(shape = var_5202, x = vc_67_cast_fp16)[name = string("vc_69_cast_fp16")]; tensor var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor([2])]; tensor var_5205_cast_fp16 = expand_dims(axes = var_5205_axes_0, x = kc_69_cast_fp16)[name = string("op_5205_cast_fp16")]; tensor var_5213_reps_0 = const()[name = string("op_5213_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5213_cast_fp16 = tile(reps = var_5213_reps_0, x = var_5205_cast_fp16)[name = string("op_5213_cast_fp16")]; tensor var_5218 = const()[name = string("op_5218"), val = tensor([1, 16, 128, 256])]; tensor kc_71_cast_fp16 = reshape(shape = var_5218, x = var_5213_cast_fp16)[name = string("kc_71_cast_fp16")]; tensor var_5221_axes_0 = const()[name = string("op_5221_axes_0"), val = tensor([2])]; tensor var_5221_cast_fp16 = expand_dims(axes = var_5221_axes_0, x = vc_69_cast_fp16)[name = string("op_5221_cast_fp16")]; tensor var_5229_reps_0 = const()[name = string("op_5229_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5229_cast_fp16 = tile(reps = var_5229_reps_0, x = var_5221_cast_fp16)[name = string("op_5229_cast_fp16")]; tensor var_5234 = const()[name = string("op_5234"), val = tensor([1, 16, 128, 256])]; tensor vc_71_cast_fp16 = reshape(shape = var_5234, x = var_5229_cast_fp16)[name = string("vc_71_cast_fp16")]; bool var_5236_transpose_x_0 = const()[name = string("op_5236_transpose_x_0"), val = bool(false)]; bool var_5236_transpose_y_0 = const()[name = string("op_5236_transpose_y_0"), val = bool(false)]; tensor var_5236_cast_fp16 = matmul(transpose_x = var_5236_transpose_x_0, transpose_y = var_5236_transpose_y_0, x = q_95_cast_fp16, y = kc_71_cast_fp16)[name = string("op_5236_cast_fp16")]; fp16 _inversed_attn_weights_89_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_89_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_89_cast_fp16 = mul(x = var_5236_cast_fp16, y = _inversed_attn_weights_89_y_0_to_fp16)[name = string("_inversed_attn_weights_89_cast_fp16")]; tensor attn_weights_91_cast_fp16 = add(x = _inversed_attn_weights_89_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_91_cast_fp16")]; int32 var_5250 = const()[name = string("op_5250"), val = int32(-1)]; tensor attn_weights_95_cast_fp16 = softmax(axis = var_5250, x = attn_weights_91_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; bool attn_output_45_transpose_x_1 = const()[name = string("attn_output_45_transpose_x_1"), val = bool(false)]; bool attn_output_45_transpose_y_1 = const()[name = string("attn_output_45_transpose_y_1"), val = bool(true)]; tensor attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_1, transpose_y = attn_output_45_transpose_y_1, x = attn_weights_95_cast_fp16, y = vc_71_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_5259_perm_0 = const()[name = string("op_5259_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5263 = const()[name = string("op_5263"), val = tensor([1, 1, -1])]; tensor var_5259_cast_fp16 = transpose(perm = var_5259_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_64")]; tensor input_113_cast_fp16 = reshape(shape = var_5263, x = var_5259_cast_fp16)[name = string("input_113_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179426752))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor var_5269_axes_0 = const()[name = string("op_5269_axes_0"), val = tensor([0])]; tensor var_5269_cast_fp16 = squeeze(axes = var_5269_axes_0, x = linear_80_cast_fp16)[name = string("op_5269_cast_fp16")]; tensor var_5271_axes_0 = const()[name = string("op_5271_axes_0"), val = tensor([0])]; tensor var_5271_cast_fp16 = squeeze(axes = var_5271_axes_0, x = var_5269_cast_fp16)[name = string("op_5271_cast_fp16")]; tensor var_5273_axes_0 = const()[name = string("op_5273_axes_0"), val = tensor([-1])]; tensor var_5273_cast_fp16 = expand_dims(axes = var_5273_axes_0, x = var_5271_cast_fp16)[name = string("op_5273_cast_fp16")]; tensor attn_4d_23_axes_0 = const()[name = string("attn_4d_23_axes_0"), val = tensor([-1])]; tensor attn_4d_23_cast_fp16 = expand_dims(axes = attn_4d_23_axes_0, x = var_5273_cast_fp16)[name = string("attn_4d_23_cast_fp16")]; tensor hidden_45_cast_fp16 = add(x = hidden_43_cast_fp16, y = attn_4d_23_cast_fp16)[name = string("hidden_45_cast_fp16")]; tensor var_5279_axes_0 = const()[name = string("op_5279_axes_0"), val = tensor([-1])]; tensor var_5279_cast_fp16 = squeeze(axes = var_5279_axes_0, x = hidden_45_cast_fp16)[name = string("op_5279_cast_fp16")]; tensor var_5281_axes_0 = const()[name = string("op_5281_axes_0"), val = tensor([-1])]; tensor var_5281_cast_fp16 = squeeze(axes = var_5281_axes_0, x = var_5279_cast_fp16)[name = string("op_5281_cast_fp16")]; tensor hidden_states_283_axes_0 = const()[name = string("hidden_states_283_axes_0"), val = tensor([0])]; tensor hidden_states_283_cast_fp16 = expand_dims(axes = hidden_states_283_axes_0, x = var_5281_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; fp16 var_5287_promoted_to_fp16 = const()[name = string("op_5287_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5293_cast_fp16 = pow(x = hidden_states_283_cast_fp16, y = var_5287_promoted_to_fp16)[name = string("op_5293_cast_fp16")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_5293_cast_fp16)[name = string("variance_95_cast_fp16")]; fp16 var_5296_to_fp16 = const()[name = string("op_5296_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5297_cast_fp16 = add(x = variance_95_cast_fp16, y = var_5296_to_fp16)[name = string("op_5297_cast_fp16")]; fp32 var_5298_epsilon_0 = const()[name = string("op_5298_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5298_cast_fp16 = rsqrt(epsilon = var_5298_epsilon_0, x = var_5297_cast_fp16)[name = string("op_5298_cast_fp16")]; tensor hidden_states_287_cast_fp16 = mul(x = hidden_states_283_cast_fp16, y = var_5298_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; tensor const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427328)))]; tensor input_115_cast_fp16 = mul(x = const_120_to_fp16, y = hidden_states_287_cast_fp16)[name = string("input_115_cast_fp16")]; tensor layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179429440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575232))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_5308_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_5308_cast_fp16")]; tensor layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185721600))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_5308_cast_fp16, y = linear_82_cast_fp16)[name = string("input_119_cast_fp16")]; tensor layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185722176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188867968))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor var_5315_axes_0 = const()[name = string("op_5315_axes_0"), val = tensor([0])]; tensor var_5315_cast_fp16 = squeeze(axes = var_5315_axes_0, x = linear_83_cast_fp16)[name = string("op_5315_cast_fp16")]; tensor var_5317_axes_0 = const()[name = string("op_5317_axes_0"), val = tensor([0])]; tensor var_5317_cast_fp16 = squeeze(axes = var_5317_axes_0, x = var_5315_cast_fp16)[name = string("op_5317_cast_fp16")]; tensor var_5319_axes_0 = const()[name = string("op_5319_axes_0"), val = tensor([-1])]; tensor var_5319_cast_fp16 = expand_dims(axes = var_5319_axes_0, x = var_5317_cast_fp16)[name = string("op_5319_cast_fp16")]; tensor mlp_4d_23_axes_0 = const()[name = string("mlp_4d_23_axes_0"), val = tensor([-1])]; tensor mlp_4d_23_cast_fp16 = expand_dims(axes = mlp_4d_23_axes_0, x = var_5319_cast_fp16)[name = string("mlp_4d_23_cast_fp16")]; tensor hidden_47_cast_fp16 = add(x = hidden_45_cast_fp16, y = mlp_4d_23_cast_fp16)[name = string("hidden_47_cast_fp16")]; tensor var_5333_begin_0 = const()[name = string("op_5333_begin_0"), val = tensor([0, 12288, 0, 0])]; tensor var_5333_end_0 = const()[name = string("op_5333_end_0"), val = tensor([1, 13312, 1, 256])]; tensor var_5333_end_mask_0 = const()[name = string("op_5333_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5333_cast_fp16 = slice_by_index(begin = var_5333_begin_0, end = var_5333_end_0, end_mask = var_5333_end_mask_0, x = key_cache)[name = string("op_5333_cast_fp16")]; tensor var_5353_begin_0 = const()[name = string("op_5353_begin_0"), val = tensor([0, 12288, 0, 0])]; tensor var_5353_end_0 = const()[name = string("op_5353_end_0"), val = tensor([1, 13312, 1, 256])]; tensor var_5353_end_mask_0 = const()[name = string("op_5353_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5353_cast_fp16 = slice_by_index(begin = var_5353_begin_0, end = var_5353_end_0, end_mask = var_5353_end_mask_0, x = value_cache)[name = string("op_5353_cast_fp16")]; tensor var_5365_axes_0 = const()[name = string("op_5365_axes_0"), val = tensor([-1])]; tensor var_5365_cast_fp16 = squeeze(axes = var_5365_axes_0, x = hidden_47_cast_fp16)[name = string("op_5365_cast_fp16")]; tensor var_5367_axes_0 = const()[name = string("op_5367_axes_0"), val = tensor([-1])]; tensor var_5367_cast_fp16 = squeeze(axes = var_5367_axes_0, x = var_5365_cast_fp16)[name = string("op_5367_cast_fp16")]; tensor hidden_states_289_axes_0 = const()[name = string("hidden_states_289_axes_0"), val = tensor([0])]; tensor hidden_states_289_cast_fp16 = expand_dims(axes = hidden_states_289_axes_0, x = var_5367_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; fp16 var_5373_promoted_to_fp16 = const()[name = string("op_5373_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5379_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_5373_promoted_to_fp16)[name = string("op_5379_cast_fp16")]; tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_5379_cast_fp16)[name = string("variance_97_cast_fp16")]; fp16 var_5382_to_fp16 = const()[name = string("op_5382_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5383_cast_fp16 = add(x = variance_97_cast_fp16, y = var_5382_to_fp16)[name = string("op_5383_cast_fp16")]; fp32 var_5384_epsilon_0 = const()[name = string("op_5384_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5384_cast_fp16 = rsqrt(epsilon = var_5384_epsilon_0, x = var_5383_cast_fp16)[name = string("op_5384_cast_fp16")]; tensor hidden_states_293_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = var_5384_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188868544)))]; tensor input_121_cast_fp16 = mul(x = const_121_to_fp16, y = hidden_states_293_cast_fp16)[name = string("input_121_cast_fp16")]; tensor layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188870656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190967872))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190968448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017088))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066304))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor var_5401 = const()[name = string("op_5401"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_295_cast_fp16 = reshape(shape = var_5401, x = linear_84_cast_fp16)[name = string("hidden_states_295_cast_fp16")]; tensor var_5407 = const()[name = string("op_5407"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_301_cast_fp16 = reshape(shape = var_5407, x = linear_85_cast_fp16)[name = string("hidden_states_301_cast_fp16")]; tensor var_5413 = const()[name = string("op_5413"), val = tensor([1, 1, 8, 128])]; tensor v_75_cast_fp16 = reshape(shape = var_5413, x = linear_86_cast_fp16)[name = string("v_75_cast_fp16")]; fp16 var_5418_promoted_to_fp16 = const()[name = string("op_5418_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5424_cast_fp16 = pow(x = hidden_states_295_cast_fp16, y = var_5418_promoted_to_fp16)[name = string("op_5424_cast_fp16")]; tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_5424_cast_fp16)[name = string("variance_99_cast_fp16")]; fp16 var_5427_to_fp16 = const()[name = string("op_5427_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5428_cast_fp16 = add(x = variance_99_cast_fp16, y = var_5427_to_fp16)[name = string("op_5428_cast_fp16")]; fp32 var_5429_epsilon_0 = const()[name = string("op_5429_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5429_cast_fp16 = rsqrt(epsilon = var_5429_epsilon_0, x = var_5428_cast_fp16)[name = string("op_5429_cast_fp16")]; tensor hidden_states_299_cast_fp16 = mul(x = hidden_states_295_cast_fp16, y = var_5429_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; tensor const_122_to_fp16 = const()[name = string("const_122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066880)))]; tensor q_99_cast_fp16 = mul(x = const_122_to_fp16, y = hidden_states_299_cast_fp16)[name = string("q_99_cast_fp16")]; fp16 var_5436_promoted_to_fp16 = const()[name = string("op_5436_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5442_cast_fp16 = pow(x = hidden_states_301_cast_fp16, y = var_5436_promoted_to_fp16)[name = string("op_5442_cast_fp16")]; tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_5442_cast_fp16)[name = string("variance_101_cast_fp16")]; fp16 var_5445_to_fp16 = const()[name = string("op_5445_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5446_cast_fp16 = add(x = variance_101_cast_fp16, y = var_5445_to_fp16)[name = string("op_5446_cast_fp16")]; fp32 var_5447_epsilon_0 = const()[name = string("op_5447_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5447_cast_fp16 = rsqrt(epsilon = var_5447_epsilon_0, x = var_5446_cast_fp16)[name = string("op_5447_cast_fp16")]; tensor hidden_states_305_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = var_5447_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067200)))]; tensor k_99_cast_fp16 = mul(x = const_123_to_fp16, y = hidden_states_305_cast_fp16)[name = string("k_99_cast_fp16")]; tensor q_101_perm_0 = const()[name = string("q_101_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_101_perm_0 = const()[name = string("k_101_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_77_perm_0 = const()[name = string("v_77_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_101_cast_fp16 = transpose(perm = q_101_perm_0, x = q_99_cast_fp16)[name = string("transpose_63")]; tensor var_5464_cast_fp16 = mul(x = q_101_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5464_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_101_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_101_cast_fp16)[name = string("x2_49_cast_fp16")]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5485_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_5485_cast_fp16")]; int32 var_5487 = const()[name = string("op_5487"), val = int32(-1)]; bool var_5488_interleave_0 = const()[name = string("op_5488_interleave_0"), val = bool(false)]; tensor var_5488_cast_fp16 = concat(axis = var_5487, interleave = var_5488_interleave_0, values = (var_5485_cast_fp16, x1_49_cast_fp16))[name = string("op_5488_cast_fp16")]; tensor var_5489_cast_fp16 = mul(x = var_5488_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5489_cast_fp16")]; tensor q_103_cast_fp16 = add(x = var_5464_cast_fp16, y = var_5489_cast_fp16)[name = string("q_103_cast_fp16")]; tensor k_101_cast_fp16 = transpose(perm = k_101_perm_0, x = k_99_cast_fp16)[name = string("transpose_62")]; tensor var_5492_cast_fp16 = mul(x = k_101_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5492_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_101_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_101_cast_fp16)[name = string("x2_51_cast_fp16")]; fp16 const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5513_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_129_promoted_to_fp16)[name = string("op_5513_cast_fp16")]; int32 var_5515 = const()[name = string("op_5515"), val = int32(-1)]; bool var_5516_interleave_0 = const()[name = string("op_5516_interleave_0"), val = bool(false)]; tensor var_5516_cast_fp16 = concat(axis = var_5515, interleave = var_5516_interleave_0, values = (var_5513_cast_fp16, x1_51_cast_fp16))[name = string("op_5516_cast_fp16")]; tensor var_5517_cast_fp16 = mul(x = var_5516_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5517_cast_fp16")]; tensor k_103_cast_fp16 = add(x = var_5492_cast_fp16, y = var_5517_cast_fp16)[name = string("k_103_cast_fp16")]; tensor var_5524 = const()[name = string("op_5524"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_25_cast_fp16 = reshape(shape = var_5524, x = k_103_cast_fp16)[name = string("nk_flat_25_cast_fp16")]; tensor var_5530 = const()[name = string("op_5530"), val = tensor([1, 1024, 1, 1])]; tensor v_77_cast_fp16 = transpose(perm = v_77_perm_0, x = v_75_cast_fp16)[name = string("transpose_61")]; tensor nv_flat_25_cast_fp16 = reshape(shape = var_5530, x = v_77_cast_fp16)[name = string("nv_flat_25_cast_fp16")]; tensor var_5539_cast_fp16 = mul(x = var_5333_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5539_cast_fp16")]; tensor var_5540_cast_fp16 = mul(x = nk_flat_25_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5540_cast_fp16")]; tensor key_cache_53_cast_fp16 = add(x = var_5539_cast_fp16, y = var_5540_cast_fp16)[name = string("key_cache_53_cast_fp16")]; tensor var_5546_cast_fp16 = mul(x = var_5353_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5546_cast_fp16")]; tensor var_5547_cast_fp16 = mul(x = nv_flat_25_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5547_cast_fp16")]; tensor value_cache_53_cast_fp16 = add(x = var_5546_cast_fp16, y = var_5547_cast_fp16)[name = string("value_cache_53_cast_fp16")]; tensor kc_73_axes_0 = const()[name = string("kc_73_axes_0"), val = tensor([2])]; tensor kc_73_cast_fp16 = squeeze(axes = kc_73_axes_0, x = key_cache_53_cast_fp16)[name = string("kc_73_cast_fp16")]; tensor var_5556 = const()[name = string("op_5556"), val = tensor([1, 8, 128, 256])]; tensor kc_75_cast_fp16 = reshape(shape = var_5556, x = kc_73_cast_fp16)[name = string("kc_75_cast_fp16")]; tensor vc_73_axes_0 = const()[name = string("vc_73_axes_0"), val = tensor([2])]; tensor vc_73_cast_fp16 = squeeze(axes = vc_73_axes_0, x = value_cache_53_cast_fp16)[name = string("vc_73_cast_fp16")]; tensor var_5564 = const()[name = string("op_5564"), val = tensor([1, 8, 128, 256])]; tensor vc_75_cast_fp16 = reshape(shape = var_5564, x = vc_73_cast_fp16)[name = string("vc_75_cast_fp16")]; tensor var_5567_axes_0 = const()[name = string("op_5567_axes_0"), val = tensor([2])]; tensor var_5567_cast_fp16 = expand_dims(axes = var_5567_axes_0, x = kc_75_cast_fp16)[name = string("op_5567_cast_fp16")]; tensor var_5575_reps_0 = const()[name = string("op_5575_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5575_cast_fp16 = tile(reps = var_5575_reps_0, x = var_5567_cast_fp16)[name = string("op_5575_cast_fp16")]; tensor var_5580 = const()[name = string("op_5580"), val = tensor([1, 16, 128, 256])]; tensor kc_77_cast_fp16 = reshape(shape = var_5580, x = var_5575_cast_fp16)[name = string("kc_77_cast_fp16")]; tensor var_5583_axes_0 = const()[name = string("op_5583_axes_0"), val = tensor([2])]; tensor var_5583_cast_fp16 = expand_dims(axes = var_5583_axes_0, x = vc_75_cast_fp16)[name = string("op_5583_cast_fp16")]; tensor var_5591_reps_0 = const()[name = string("op_5591_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5591_cast_fp16 = tile(reps = var_5591_reps_0, x = var_5583_cast_fp16)[name = string("op_5591_cast_fp16")]; tensor var_5596 = const()[name = string("op_5596"), val = tensor([1, 16, 128, 256])]; tensor vc_77_cast_fp16 = reshape(shape = var_5596, x = var_5591_cast_fp16)[name = string("vc_77_cast_fp16")]; bool var_5598_transpose_x_0 = const()[name = string("op_5598_transpose_x_0"), val = bool(false)]; bool var_5598_transpose_y_0 = const()[name = string("op_5598_transpose_y_0"), val = bool(false)]; tensor var_5598_cast_fp16 = matmul(transpose_x = var_5598_transpose_x_0, transpose_y = var_5598_transpose_y_0, x = q_103_cast_fp16, y = kc_77_cast_fp16)[name = string("op_5598_cast_fp16")]; fp16 _inversed_attn_weights_97_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_97_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_97_cast_fp16 = mul(x = var_5598_cast_fp16, y = _inversed_attn_weights_97_y_0_to_fp16)[name = string("_inversed_attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = _inversed_attn_weights_97_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_99_cast_fp16")]; int32 var_5612 = const()[name = string("op_5612"), val = int32(-1)]; tensor attn_weights_103_cast_fp16 = softmax(axis = var_5612, x = attn_weights_99_cast_fp16)[name = string("attn_weights_103_cast_fp16")]; bool attn_output_49_transpose_x_1 = const()[name = string("attn_output_49_transpose_x_1"), val = bool(false)]; bool attn_output_49_transpose_y_1 = const()[name = string("attn_output_49_transpose_y_1"), val = bool(true)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_1, transpose_y = attn_output_49_transpose_y_1, x = attn_weights_103_cast_fp16, y = vc_77_cast_fp16)[name = string("attn_output_49_cast_fp16")]; tensor var_5621_perm_0 = const()[name = string("op_5621_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5625 = const()[name = string("op_5625"), val = tensor([1, 1, -1])]; tensor var_5621_cast_fp16 = transpose(perm = var_5621_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_60")]; tensor input_123_cast_fp16 = reshape(shape = var_5625, x = var_5621_cast_fp16)[name = string("input_123_cast_fp16")]; tensor layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195164736))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor var_5631_axes_0 = const()[name = string("op_5631_axes_0"), val = tensor([0])]; tensor var_5631_cast_fp16 = squeeze(axes = var_5631_axes_0, x = linear_87_cast_fp16)[name = string("op_5631_cast_fp16")]; tensor var_5633_axes_0 = const()[name = string("op_5633_axes_0"), val = tensor([0])]; tensor var_5633_cast_fp16 = squeeze(axes = var_5633_axes_0, x = var_5631_cast_fp16)[name = string("op_5633_cast_fp16")]; tensor var_5635_axes_0 = const()[name = string("op_5635_axes_0"), val = tensor([-1])]; tensor var_5635_cast_fp16 = expand_dims(axes = var_5635_axes_0, x = var_5633_cast_fp16)[name = string("op_5635_cast_fp16")]; tensor attn_4d_25_axes_0 = const()[name = string("attn_4d_25_axes_0"), val = tensor([-1])]; tensor attn_4d_25_cast_fp16 = expand_dims(axes = attn_4d_25_axes_0, x = var_5635_cast_fp16)[name = string("attn_4d_25_cast_fp16")]; tensor hidden_49_cast_fp16 = add(x = hidden_47_cast_fp16, y = attn_4d_25_cast_fp16)[name = string("hidden_49_cast_fp16")]; tensor var_5641_axes_0 = const()[name = string("op_5641_axes_0"), val = tensor([-1])]; tensor var_5641_cast_fp16 = squeeze(axes = var_5641_axes_0, x = hidden_49_cast_fp16)[name = string("op_5641_cast_fp16")]; tensor var_5643_axes_0 = const()[name = string("op_5643_axes_0"), val = tensor([-1])]; tensor var_5643_cast_fp16 = squeeze(axes = var_5643_axes_0, x = var_5641_cast_fp16)[name = string("op_5643_cast_fp16")]; tensor hidden_states_307_axes_0 = const()[name = string("hidden_states_307_axes_0"), val = tensor([0])]; tensor hidden_states_307_cast_fp16 = expand_dims(axes = hidden_states_307_axes_0, x = var_5643_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; fp16 var_5649_promoted_to_fp16 = const()[name = string("op_5649_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5655_cast_fp16 = pow(x = hidden_states_307_cast_fp16, y = var_5649_promoted_to_fp16)[name = string("op_5655_cast_fp16")]; tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_5655_cast_fp16)[name = string("variance_103_cast_fp16")]; fp16 var_5658_to_fp16 = const()[name = string("op_5658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5659_cast_fp16 = add(x = variance_103_cast_fp16, y = var_5658_to_fp16)[name = string("op_5659_cast_fp16")]; fp32 var_5660_epsilon_0 = const()[name = string("op_5660_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5660_cast_fp16 = rsqrt(epsilon = var_5660_epsilon_0, x = var_5659_cast_fp16)[name = string("op_5660_cast_fp16")]; tensor hidden_states_311_cast_fp16 = mul(x = hidden_states_307_cast_fp16, y = var_5660_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; tensor const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195165312)))]; tensor input_125_cast_fp16 = mul(x = const_130_to_fp16, y = hidden_states_311_cast_fp16)[name = string("input_125_cast_fp16")]; tensor layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195167424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313216))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = input_125_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_5670_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_5670_cast_fp16")]; tensor layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201459584))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = input_125_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_129_cast_fp16 = mul(x = var_5670_cast_fp16, y = linear_89_cast_fp16)[name = string("input_129_cast_fp16")]; tensor layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201460160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204605952))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor var_5677_axes_0 = const()[name = string("op_5677_axes_0"), val = tensor([0])]; tensor var_5677_cast_fp16 = squeeze(axes = var_5677_axes_0, x = linear_90_cast_fp16)[name = string("op_5677_cast_fp16")]; tensor var_5679_axes_0 = const()[name = string("op_5679_axes_0"), val = tensor([0])]; tensor var_5679_cast_fp16 = squeeze(axes = var_5679_axes_0, x = var_5677_cast_fp16)[name = string("op_5679_cast_fp16")]; tensor var_5681_axes_0 = const()[name = string("op_5681_axes_0"), val = tensor([-1])]; tensor var_5681_cast_fp16 = expand_dims(axes = var_5681_axes_0, x = var_5679_cast_fp16)[name = string("op_5681_cast_fp16")]; tensor mlp_4d_25_axes_0 = const()[name = string("mlp_4d_25_axes_0"), val = tensor([-1])]; tensor mlp_4d_25_cast_fp16 = expand_dims(axes = mlp_4d_25_axes_0, x = var_5681_cast_fp16)[name = string("mlp_4d_25_cast_fp16")]; tensor hidden_51_cast_fp16 = add(x = hidden_49_cast_fp16, y = mlp_4d_25_cast_fp16)[name = string("hidden_51_cast_fp16")]; tensor var_5695_begin_0 = const()[name = string("op_5695_begin_0"), val = tensor([0, 13312, 0, 0])]; tensor var_5695_end_0 = const()[name = string("op_5695_end_0"), val = tensor([1, 14336, 1, 256])]; tensor var_5695_end_mask_0 = const()[name = string("op_5695_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5695_cast_fp16 = slice_by_index(begin = var_5695_begin_0, end = var_5695_end_0, end_mask = var_5695_end_mask_0, x = key_cache)[name = string("op_5695_cast_fp16")]; tensor var_5715_begin_0 = const()[name = string("op_5715_begin_0"), val = tensor([0, 13312, 0, 0])]; tensor var_5715_end_0 = const()[name = string("op_5715_end_0"), val = tensor([1, 14336, 1, 256])]; tensor var_5715_end_mask_0 = const()[name = string("op_5715_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5715_cast_fp16 = slice_by_index(begin = var_5715_begin_0, end = var_5715_end_0, end_mask = var_5715_end_mask_0, x = value_cache)[name = string("op_5715_cast_fp16")]; tensor var_5727_axes_0 = const()[name = string("op_5727_axes_0"), val = tensor([-1])]; tensor var_5727_cast_fp16 = squeeze(axes = var_5727_axes_0, x = hidden_51_cast_fp16)[name = string("op_5727_cast_fp16")]; tensor var_5729_axes_0 = const()[name = string("op_5729_axes_0"), val = tensor([-1])]; tensor var_5729_cast_fp16 = squeeze(axes = var_5729_axes_0, x = var_5727_cast_fp16)[name = string("op_5729_cast_fp16")]; tensor hidden_states_313_axes_0 = const()[name = string("hidden_states_313_axes_0"), val = tensor([0])]; tensor hidden_states_313_cast_fp16 = expand_dims(axes = hidden_states_313_axes_0, x = var_5729_cast_fp16)[name = string("hidden_states_313_cast_fp16")]; fp16 var_5735_promoted_to_fp16 = const()[name = string("op_5735_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5741_cast_fp16 = pow(x = hidden_states_313_cast_fp16, y = var_5735_promoted_to_fp16)[name = string("op_5741_cast_fp16")]; tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_5741_cast_fp16)[name = string("variance_105_cast_fp16")]; fp16 var_5744_to_fp16 = const()[name = string("op_5744_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5745_cast_fp16 = add(x = variance_105_cast_fp16, y = var_5744_to_fp16)[name = string("op_5745_cast_fp16")]; fp32 var_5746_epsilon_0 = const()[name = string("op_5746_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5746_cast_fp16 = rsqrt(epsilon = var_5746_epsilon_0, x = var_5745_cast_fp16)[name = string("op_5746_cast_fp16")]; tensor hidden_states_317_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = var_5746_cast_fp16)[name = string("hidden_states_317_cast_fp16")]; tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204606528)))]; tensor input_131_cast_fp16 = mul(x = const_131_to_fp16, y = hidden_states_317_cast_fp16)[name = string("input_131_cast_fp16")]; tensor layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204608640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206705856))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206706432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755072))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804288))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor var_5763 = const()[name = string("op_5763"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_319_cast_fp16 = reshape(shape = var_5763, x = linear_91_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; tensor var_5769 = const()[name = string("op_5769"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_325_cast_fp16 = reshape(shape = var_5769, x = linear_92_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; tensor var_5775 = const()[name = string("op_5775"), val = tensor([1, 1, 8, 128])]; tensor v_81_cast_fp16 = reshape(shape = var_5775, x = linear_93_cast_fp16)[name = string("v_81_cast_fp16")]; fp16 var_5780_promoted_to_fp16 = const()[name = string("op_5780_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5786_cast_fp16 = pow(x = hidden_states_319_cast_fp16, y = var_5780_promoted_to_fp16)[name = string("op_5786_cast_fp16")]; tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_5786_cast_fp16)[name = string("variance_107_cast_fp16")]; fp16 var_5789_to_fp16 = const()[name = string("op_5789_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5790_cast_fp16 = add(x = variance_107_cast_fp16, y = var_5789_to_fp16)[name = string("op_5790_cast_fp16")]; fp32 var_5791_epsilon_0 = const()[name = string("op_5791_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5791_cast_fp16 = rsqrt(epsilon = var_5791_epsilon_0, x = var_5790_cast_fp16)[name = string("op_5791_cast_fp16")]; tensor hidden_states_323_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = var_5791_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; tensor const_132_to_fp16 = const()[name = string("const_132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804864)))]; tensor q_107_cast_fp16 = mul(x = const_132_to_fp16, y = hidden_states_323_cast_fp16)[name = string("q_107_cast_fp16")]; fp16 var_5798_promoted_to_fp16 = const()[name = string("op_5798_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5804_cast_fp16 = pow(x = hidden_states_325_cast_fp16, y = var_5798_promoted_to_fp16)[name = string("op_5804_cast_fp16")]; tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_5804_cast_fp16)[name = string("variance_109_cast_fp16")]; fp16 var_5807_to_fp16 = const()[name = string("op_5807_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5808_cast_fp16 = add(x = variance_109_cast_fp16, y = var_5807_to_fp16)[name = string("op_5808_cast_fp16")]; fp32 var_5809_epsilon_0 = const()[name = string("op_5809_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5809_cast_fp16 = rsqrt(epsilon = var_5809_epsilon_0, x = var_5808_cast_fp16)[name = string("op_5809_cast_fp16")]; tensor hidden_states_329_cast_fp16 = mul(x = hidden_states_325_cast_fp16, y = var_5809_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; tensor const_133_to_fp16 = const()[name = string("const_133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805184)))]; tensor k_107_cast_fp16 = mul(x = const_133_to_fp16, y = hidden_states_329_cast_fp16)[name = string("k_107_cast_fp16")]; tensor q_109_perm_0 = const()[name = string("q_109_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_109_perm_0 = const()[name = string("k_109_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_83_perm_0 = const()[name = string("v_83_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_109_cast_fp16 = transpose(perm = q_109_perm_0, x = q_107_cast_fp16)[name = string("transpose_59")]; tensor var_5826_cast_fp16 = mul(x = q_109_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5826_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_109_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_109_cast_fp16)[name = string("x2_53_cast_fp16")]; fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5847_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_5847_cast_fp16")]; int32 var_5849 = const()[name = string("op_5849"), val = int32(-1)]; bool var_5850_interleave_0 = const()[name = string("op_5850_interleave_0"), val = bool(false)]; tensor var_5850_cast_fp16 = concat(axis = var_5849, interleave = var_5850_interleave_0, values = (var_5847_cast_fp16, x1_53_cast_fp16))[name = string("op_5850_cast_fp16")]; tensor var_5851_cast_fp16 = mul(x = var_5850_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5851_cast_fp16")]; tensor q_111_cast_fp16 = add(x = var_5826_cast_fp16, y = var_5851_cast_fp16)[name = string("q_111_cast_fp16")]; tensor k_109_cast_fp16 = transpose(perm = k_109_perm_0, x = k_107_cast_fp16)[name = string("transpose_58")]; tensor var_5854_cast_fp16 = mul(x = k_109_cast_fp16, y = cos_3_cast_fp16)[name = string("op_5854_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_109_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_109_cast_fp16)[name = string("x2_55_cast_fp16")]; fp16 const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5875_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_5875_cast_fp16")]; int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)]; bool var_5878_interleave_0 = const()[name = string("op_5878_interleave_0"), val = bool(false)]; tensor var_5878_cast_fp16 = concat(axis = var_5877, interleave = var_5878_interleave_0, values = (var_5875_cast_fp16, x1_55_cast_fp16))[name = string("op_5878_cast_fp16")]; tensor var_5879_cast_fp16 = mul(x = var_5878_cast_fp16, y = sin_3_cast_fp16)[name = string("op_5879_cast_fp16")]; tensor k_111_cast_fp16 = add(x = var_5854_cast_fp16, y = var_5879_cast_fp16)[name = string("k_111_cast_fp16")]; tensor var_5886 = const()[name = string("op_5886"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_27_cast_fp16 = reshape(shape = var_5886, x = k_111_cast_fp16)[name = string("nk_flat_27_cast_fp16")]; tensor var_5892 = const()[name = string("op_5892"), val = tensor([1, 1024, 1, 1])]; tensor v_83_cast_fp16 = transpose(perm = v_83_perm_0, x = v_81_cast_fp16)[name = string("transpose_57")]; tensor nv_flat_27_cast_fp16 = reshape(shape = var_5892, x = v_83_cast_fp16)[name = string("nv_flat_27_cast_fp16")]; tensor var_5901_cast_fp16 = mul(x = var_5695_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5901_cast_fp16")]; tensor var_5902_cast_fp16 = mul(x = nk_flat_27_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5902_cast_fp16")]; tensor key_cache_57_cast_fp16 = add(x = var_5901_cast_fp16, y = var_5902_cast_fp16)[name = string("key_cache_57_cast_fp16")]; tensor var_5908_cast_fp16 = mul(x = var_5715_cast_fp16, y = var_1194_cast_fp16)[name = string("op_5908_cast_fp16")]; tensor var_5909_cast_fp16 = mul(x = nv_flat_27_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_5909_cast_fp16")]; tensor value_cache_57_cast_fp16 = add(x = var_5908_cast_fp16, y = var_5909_cast_fp16)[name = string("value_cache_57_cast_fp16")]; tensor kc_79_axes_0 = const()[name = string("kc_79_axes_0"), val = tensor([2])]; tensor kc_79_cast_fp16 = squeeze(axes = kc_79_axes_0, x = key_cache_57_cast_fp16)[name = string("kc_79_cast_fp16")]; tensor var_5918 = const()[name = string("op_5918"), val = tensor([1, 8, 128, 256])]; tensor kc_81_cast_fp16 = reshape(shape = var_5918, x = kc_79_cast_fp16)[name = string("kc_81_cast_fp16")]; tensor vc_79_axes_0 = const()[name = string("vc_79_axes_0"), val = tensor([2])]; tensor vc_79_cast_fp16 = squeeze(axes = vc_79_axes_0, x = value_cache_57_cast_fp16)[name = string("vc_79_cast_fp16")]; tensor var_5926 = const()[name = string("op_5926"), val = tensor([1, 8, 128, 256])]; tensor vc_81_cast_fp16 = reshape(shape = var_5926, x = vc_79_cast_fp16)[name = string("vc_81_cast_fp16")]; tensor var_5929_axes_0 = const()[name = string("op_5929_axes_0"), val = tensor([2])]; tensor var_5929_cast_fp16 = expand_dims(axes = var_5929_axes_0, x = kc_81_cast_fp16)[name = string("op_5929_cast_fp16")]; tensor var_5937_reps_0 = const()[name = string("op_5937_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5937_cast_fp16 = tile(reps = var_5937_reps_0, x = var_5929_cast_fp16)[name = string("op_5937_cast_fp16")]; tensor var_5942 = const()[name = string("op_5942"), val = tensor([1, 16, 128, 256])]; tensor kc_83_cast_fp16 = reshape(shape = var_5942, x = var_5937_cast_fp16)[name = string("kc_83_cast_fp16")]; tensor var_5945_axes_0 = const()[name = string("op_5945_axes_0"), val = tensor([2])]; tensor var_5945_cast_fp16 = expand_dims(axes = var_5945_axes_0, x = vc_81_cast_fp16)[name = string("op_5945_cast_fp16")]; tensor var_5953_reps_0 = const()[name = string("op_5953_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5953_cast_fp16 = tile(reps = var_5953_reps_0, x = var_5945_cast_fp16)[name = string("op_5953_cast_fp16")]; tensor var_5958 = const()[name = string("op_5958"), val = tensor([1, 16, 128, 256])]; tensor vc_83_cast_fp16 = reshape(shape = var_5958, x = var_5953_cast_fp16)[name = string("vc_83_cast_fp16")]; bool var_5960_transpose_x_0 = const()[name = string("op_5960_transpose_x_0"), val = bool(false)]; bool var_5960_transpose_y_0 = const()[name = string("op_5960_transpose_y_0"), val = bool(false)]; tensor var_5960_cast_fp16 = matmul(transpose_x = var_5960_transpose_x_0, transpose_y = var_5960_transpose_y_0, x = q_111_cast_fp16, y = kc_83_cast_fp16)[name = string("op_5960_cast_fp16")]; fp16 _inversed_attn_weights_105_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_105_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_105_cast_fp16 = mul(x = var_5960_cast_fp16, y = _inversed_attn_weights_105_y_0_to_fp16)[name = string("_inversed_attn_weights_105_cast_fp16")]; tensor attn_weights_107_cast_fp16 = add(x = _inversed_attn_weights_105_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; int32 var_5974 = const()[name = string("op_5974"), val = int32(-1)]; tensor attn_weights_111_cast_fp16 = softmax(axis = var_5974, x = attn_weights_107_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; bool attn_output_53_transpose_x_1 = const()[name = string("attn_output_53_transpose_x_1"), val = bool(false)]; bool attn_output_53_transpose_y_1 = const()[name = string("attn_output_53_transpose_y_1"), val = bool(true)]; tensor attn_output_53_cast_fp16 = matmul(transpose_x = attn_output_53_transpose_x_1, transpose_y = attn_output_53_transpose_y_1, x = attn_weights_111_cast_fp16, y = vc_83_cast_fp16)[name = string("attn_output_53_cast_fp16")]; tensor var_5983_perm_0 = const()[name = string("op_5983_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5987 = const()[name = string("op_5987"), val = tensor([1, 1, -1])]; tensor var_5983_cast_fp16 = transpose(perm = var_5983_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_56")]; tensor input_133_cast_fp16 = reshape(shape = var_5987, x = var_5983_cast_fp16)[name = string("input_133_cast_fp16")]; tensor layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210902720))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor var_5993_axes_0 = const()[name = string("op_5993_axes_0"), val = tensor([0])]; tensor var_5993_cast_fp16 = squeeze(axes = var_5993_axes_0, x = linear_94_cast_fp16)[name = string("op_5993_cast_fp16")]; tensor var_5995_axes_0 = const()[name = string("op_5995_axes_0"), val = tensor([0])]; tensor var_5995_cast_fp16 = squeeze(axes = var_5995_axes_0, x = var_5993_cast_fp16)[name = string("op_5995_cast_fp16")]; tensor var_5997_axes_0 = const()[name = string("op_5997_axes_0"), val = tensor([-1])]; tensor var_5997_cast_fp16 = expand_dims(axes = var_5997_axes_0, x = var_5995_cast_fp16)[name = string("op_5997_cast_fp16")]; tensor attn_4d_27_axes_0 = const()[name = string("attn_4d_27_axes_0"), val = tensor([-1])]; tensor attn_4d_27_cast_fp16 = expand_dims(axes = attn_4d_27_axes_0, x = var_5997_cast_fp16)[name = string("attn_4d_27_cast_fp16")]; tensor hidden_53_cast_fp16 = add(x = hidden_51_cast_fp16, y = attn_4d_27_cast_fp16)[name = string("hidden_53_cast_fp16")]; tensor var_6003_axes_0 = const()[name = string("op_6003_axes_0"), val = tensor([-1])]; tensor var_6003_cast_fp16 = squeeze(axes = var_6003_axes_0, x = hidden_53_cast_fp16)[name = string("op_6003_cast_fp16")]; tensor var_6005_axes_0 = const()[name = string("op_6005_axes_0"), val = tensor([-1])]; tensor var_6005_cast_fp16 = squeeze(axes = var_6005_axes_0, x = var_6003_cast_fp16)[name = string("op_6005_cast_fp16")]; tensor hidden_states_331_axes_0 = const()[name = string("hidden_states_331_axes_0"), val = tensor([0])]; tensor hidden_states_331_cast_fp16 = expand_dims(axes = hidden_states_331_axes_0, x = var_6005_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; fp16 var_6011_promoted_to_fp16 = const()[name = string("op_6011_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6017_cast_fp16 = pow(x = hidden_states_331_cast_fp16, y = var_6011_promoted_to_fp16)[name = string("op_6017_cast_fp16")]; tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_6017_cast_fp16)[name = string("variance_111_cast_fp16")]; fp16 var_6020_to_fp16 = const()[name = string("op_6020_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6021_cast_fp16 = add(x = variance_111_cast_fp16, y = var_6020_to_fp16)[name = string("op_6021_cast_fp16")]; fp32 var_6022_epsilon_0 = const()[name = string("op_6022_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6022_cast_fp16 = rsqrt(epsilon = var_6022_epsilon_0, x = var_6021_cast_fp16)[name = string("op_6022_cast_fp16")]; tensor hidden_states_335_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = var_6022_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; tensor const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210903296)))]; tensor input_135_cast_fp16 = mul(x = const_140_to_fp16, y = hidden_states_335_cast_fp16)[name = string("input_135_cast_fp16")]; tensor layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210905408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051200))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_6032_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_6032_cast_fp16")]; tensor layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217197568))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_139_cast_fp16 = mul(x = var_6032_cast_fp16, y = linear_96_cast_fp16)[name = string("input_139_cast_fp16")]; tensor layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217198144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220343936))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor var_6039_axes_0 = const()[name = string("op_6039_axes_0"), val = tensor([0])]; tensor var_6039_cast_fp16 = squeeze(axes = var_6039_axes_0, x = linear_97_cast_fp16)[name = string("op_6039_cast_fp16")]; tensor var_6041_axes_0 = const()[name = string("op_6041_axes_0"), val = tensor([0])]; tensor var_6041_cast_fp16 = squeeze(axes = var_6041_axes_0, x = var_6039_cast_fp16)[name = string("op_6041_cast_fp16")]; tensor var_6043_axes_0 = const()[name = string("op_6043_axes_0"), val = tensor([-1])]; tensor var_6043_cast_fp16 = expand_dims(axes = var_6043_axes_0, x = var_6041_cast_fp16)[name = string("op_6043_cast_fp16")]; tensor mlp_4d_27_axes_0 = const()[name = string("mlp_4d_27_axes_0"), val = tensor([-1])]; tensor mlp_4d_27_cast_fp16 = expand_dims(axes = mlp_4d_27_axes_0, x = var_6043_cast_fp16)[name = string("mlp_4d_27_cast_fp16")]; tensor hidden_55_cast_fp16 = add(x = hidden_53_cast_fp16, y = mlp_4d_27_cast_fp16)[name = string("hidden_55_cast_fp16")]; tensor var_6057_begin_0 = const()[name = string("op_6057_begin_0"), val = tensor([0, 14336, 0, 0])]; tensor var_6057_end_0 = const()[name = string("op_6057_end_0"), val = tensor([1, 15360, 1, 256])]; tensor var_6057_end_mask_0 = const()[name = string("op_6057_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6057_cast_fp16 = slice_by_index(begin = var_6057_begin_0, end = var_6057_end_0, end_mask = var_6057_end_mask_0, x = key_cache)[name = string("op_6057_cast_fp16")]; tensor var_6077_begin_0 = const()[name = string("op_6077_begin_0"), val = tensor([0, 14336, 0, 0])]; tensor var_6077_end_0 = const()[name = string("op_6077_end_0"), val = tensor([1, 15360, 1, 256])]; tensor var_6077_end_mask_0 = const()[name = string("op_6077_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6077_cast_fp16 = slice_by_index(begin = var_6077_begin_0, end = var_6077_end_0, end_mask = var_6077_end_mask_0, x = value_cache)[name = string("op_6077_cast_fp16")]; tensor var_6089_axes_0 = const()[name = string("op_6089_axes_0"), val = tensor([-1])]; tensor var_6089_cast_fp16 = squeeze(axes = var_6089_axes_0, x = hidden_55_cast_fp16)[name = string("op_6089_cast_fp16")]; tensor var_6091_axes_0 = const()[name = string("op_6091_axes_0"), val = tensor([-1])]; tensor var_6091_cast_fp16 = squeeze(axes = var_6091_axes_0, x = var_6089_cast_fp16)[name = string("op_6091_cast_fp16")]; tensor hidden_states_337_axes_0 = const()[name = string("hidden_states_337_axes_0"), val = tensor([0])]; tensor hidden_states_337_cast_fp16 = expand_dims(axes = hidden_states_337_axes_0, x = var_6091_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; fp16 var_6097_promoted_to_fp16 = const()[name = string("op_6097_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6103_cast_fp16 = pow(x = hidden_states_337_cast_fp16, y = var_6097_promoted_to_fp16)[name = string("op_6103_cast_fp16")]; tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_6103_cast_fp16)[name = string("variance_113_cast_fp16")]; fp16 var_6106_to_fp16 = const()[name = string("op_6106_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6107_cast_fp16 = add(x = variance_113_cast_fp16, y = var_6106_to_fp16)[name = string("op_6107_cast_fp16")]; fp32 var_6108_epsilon_0 = const()[name = string("op_6108_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6108_cast_fp16 = rsqrt(epsilon = var_6108_epsilon_0, x = var_6107_cast_fp16)[name = string("op_6108_cast_fp16")]; tensor hidden_states_341_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = var_6108_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; tensor const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220344512)))]; tensor input_141_cast_fp16 = mul(x = const_141_to_fp16, y = hidden_states_341_cast_fp16)[name = string("input_141_cast_fp16")]; tensor layers_14_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220346624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222443840))))[name = string("layers_14_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_14_self_attn_q_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor layers_14_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222444416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493056))))[name = string("layers_14_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_k_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor layers_14_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542272))))[name = string("layers_14_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_v_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor var_6125 = const()[name = string("op_6125"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_343_cast_fp16 = reshape(shape = var_6125, x = linear_98_cast_fp16)[name = string("hidden_states_343_cast_fp16")]; tensor var_6131 = const()[name = string("op_6131"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_349_cast_fp16 = reshape(shape = var_6131, x = linear_99_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; tensor var_6137 = const()[name = string("op_6137"), val = tensor([1, 1, 8, 128])]; tensor v_87_cast_fp16 = reshape(shape = var_6137, x = linear_100_cast_fp16)[name = string("v_87_cast_fp16")]; fp16 var_6142_promoted_to_fp16 = const()[name = string("op_6142_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6148_cast_fp16 = pow(x = hidden_states_343_cast_fp16, y = var_6142_promoted_to_fp16)[name = string("op_6148_cast_fp16")]; tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_6148_cast_fp16)[name = string("variance_115_cast_fp16")]; fp16 var_6151_to_fp16 = const()[name = string("op_6151_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6152_cast_fp16 = add(x = variance_115_cast_fp16, y = var_6151_to_fp16)[name = string("op_6152_cast_fp16")]; fp32 var_6153_epsilon_0 = const()[name = string("op_6153_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6153_cast_fp16 = rsqrt(epsilon = var_6153_epsilon_0, x = var_6152_cast_fp16)[name = string("op_6153_cast_fp16")]; tensor hidden_states_347_cast_fp16 = mul(x = hidden_states_343_cast_fp16, y = var_6153_cast_fp16)[name = string("hidden_states_347_cast_fp16")]; tensor const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542848)))]; tensor q_115_cast_fp16 = mul(x = const_142_to_fp16, y = hidden_states_347_cast_fp16)[name = string("q_115_cast_fp16")]; fp16 var_6160_promoted_to_fp16 = const()[name = string("op_6160_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6166_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_6160_promoted_to_fp16)[name = string("op_6166_cast_fp16")]; tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_6166_cast_fp16)[name = string("variance_117_cast_fp16")]; fp16 var_6169_to_fp16 = const()[name = string("op_6169_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6170_cast_fp16 = add(x = variance_117_cast_fp16, y = var_6169_to_fp16)[name = string("op_6170_cast_fp16")]; fp32 var_6171_epsilon_0 = const()[name = string("op_6171_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6171_cast_fp16 = rsqrt(epsilon = var_6171_epsilon_0, x = var_6170_cast_fp16)[name = string("op_6171_cast_fp16")]; tensor hidden_states_353_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = var_6171_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; tensor const_143_to_fp16 = const()[name = string("const_143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543168)))]; tensor k_115_cast_fp16 = mul(x = const_143_to_fp16, y = hidden_states_353_cast_fp16)[name = string("k_115_cast_fp16")]; tensor q_117_perm_0 = const()[name = string("q_117_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_117_perm_0 = const()[name = string("k_117_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_89_perm_0 = const()[name = string("v_89_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_117_cast_fp16 = transpose(perm = q_117_perm_0, x = q_115_cast_fp16)[name = string("transpose_55")]; tensor var_6188_cast_fp16 = mul(x = q_117_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6188_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_117_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_117_cast_fp16)[name = string("x2_57_cast_fp16")]; fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6209_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_6209_cast_fp16")]; int32 var_6211 = const()[name = string("op_6211"), val = int32(-1)]; bool var_6212_interleave_0 = const()[name = string("op_6212_interleave_0"), val = bool(false)]; tensor var_6212_cast_fp16 = concat(axis = var_6211, interleave = var_6212_interleave_0, values = (var_6209_cast_fp16, x1_57_cast_fp16))[name = string("op_6212_cast_fp16")]; tensor var_6213_cast_fp16 = mul(x = var_6212_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6213_cast_fp16")]; tensor q_119_cast_fp16 = add(x = var_6188_cast_fp16, y = var_6213_cast_fp16)[name = string("q_119_cast_fp16")]; tensor k_117_cast_fp16 = transpose(perm = k_117_perm_0, x = k_115_cast_fp16)[name = string("transpose_54")]; tensor var_6216_cast_fp16 = mul(x = k_117_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6216_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_117_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_117_cast_fp16)[name = string("x2_59_cast_fp16")]; fp16 const_149_promoted_to_fp16 = const()[name = string("const_149_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6237_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_149_promoted_to_fp16)[name = string("op_6237_cast_fp16")]; int32 var_6239 = const()[name = string("op_6239"), val = int32(-1)]; bool var_6240_interleave_0 = const()[name = string("op_6240_interleave_0"), val = bool(false)]; tensor var_6240_cast_fp16 = concat(axis = var_6239, interleave = var_6240_interleave_0, values = (var_6237_cast_fp16, x1_59_cast_fp16))[name = string("op_6240_cast_fp16")]; tensor var_6241_cast_fp16 = mul(x = var_6240_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6241_cast_fp16")]; tensor k_119_cast_fp16 = add(x = var_6216_cast_fp16, y = var_6241_cast_fp16)[name = string("k_119_cast_fp16")]; tensor var_6248 = const()[name = string("op_6248"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_29_cast_fp16 = reshape(shape = var_6248, x = k_119_cast_fp16)[name = string("nk_flat_29_cast_fp16")]; tensor var_6254 = const()[name = string("op_6254"), val = tensor([1, 1024, 1, 1])]; tensor v_89_cast_fp16 = transpose(perm = v_89_perm_0, x = v_87_cast_fp16)[name = string("transpose_53")]; tensor nv_flat_29_cast_fp16 = reshape(shape = var_6254, x = v_89_cast_fp16)[name = string("nv_flat_29_cast_fp16")]; tensor var_6263_cast_fp16 = mul(x = var_6057_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6263_cast_fp16")]; tensor var_6264_cast_fp16 = mul(x = nk_flat_29_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6264_cast_fp16")]; tensor key_cache_61_cast_fp16 = add(x = var_6263_cast_fp16, y = var_6264_cast_fp16)[name = string("key_cache_61_cast_fp16")]; tensor var_6270_cast_fp16 = mul(x = var_6077_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6270_cast_fp16")]; tensor var_6271_cast_fp16 = mul(x = nv_flat_29_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6271_cast_fp16")]; tensor value_cache_61_cast_fp16 = add(x = var_6270_cast_fp16, y = var_6271_cast_fp16)[name = string("value_cache_61_cast_fp16")]; tensor kc_85_axes_0 = const()[name = string("kc_85_axes_0"), val = tensor([2])]; tensor kc_85_cast_fp16 = squeeze(axes = kc_85_axes_0, x = key_cache_61_cast_fp16)[name = string("kc_85_cast_fp16")]; tensor var_6280 = const()[name = string("op_6280"), val = tensor([1, 8, 128, 256])]; tensor kc_87_cast_fp16 = reshape(shape = var_6280, x = kc_85_cast_fp16)[name = string("kc_87_cast_fp16")]; tensor vc_85_axes_0 = const()[name = string("vc_85_axes_0"), val = tensor([2])]; tensor vc_85_cast_fp16 = squeeze(axes = vc_85_axes_0, x = value_cache_61_cast_fp16)[name = string("vc_85_cast_fp16")]; tensor var_6288 = const()[name = string("op_6288"), val = tensor([1, 8, 128, 256])]; tensor vc_87_cast_fp16 = reshape(shape = var_6288, x = vc_85_cast_fp16)[name = string("vc_87_cast_fp16")]; tensor var_6291_axes_0 = const()[name = string("op_6291_axes_0"), val = tensor([2])]; tensor var_6291_cast_fp16 = expand_dims(axes = var_6291_axes_0, x = kc_87_cast_fp16)[name = string("op_6291_cast_fp16")]; tensor var_6299_reps_0 = const()[name = string("op_6299_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6299_cast_fp16 = tile(reps = var_6299_reps_0, x = var_6291_cast_fp16)[name = string("op_6299_cast_fp16")]; tensor var_6304 = const()[name = string("op_6304"), val = tensor([1, 16, 128, 256])]; tensor kc_89_cast_fp16 = reshape(shape = var_6304, x = var_6299_cast_fp16)[name = string("kc_89_cast_fp16")]; tensor var_6307_axes_0 = const()[name = string("op_6307_axes_0"), val = tensor([2])]; tensor var_6307_cast_fp16 = expand_dims(axes = var_6307_axes_0, x = vc_87_cast_fp16)[name = string("op_6307_cast_fp16")]; tensor var_6315_reps_0 = const()[name = string("op_6315_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6315_cast_fp16 = tile(reps = var_6315_reps_0, x = var_6307_cast_fp16)[name = string("op_6315_cast_fp16")]; tensor var_6320 = const()[name = string("op_6320"), val = tensor([1, 16, 128, 256])]; tensor vc_89_cast_fp16 = reshape(shape = var_6320, x = var_6315_cast_fp16)[name = string("vc_89_cast_fp16")]; bool var_6322_transpose_x_0 = const()[name = string("op_6322_transpose_x_0"), val = bool(false)]; bool var_6322_transpose_y_0 = const()[name = string("op_6322_transpose_y_0"), val = bool(false)]; tensor var_6322_cast_fp16 = matmul(transpose_x = var_6322_transpose_x_0, transpose_y = var_6322_transpose_y_0, x = q_119_cast_fp16, y = kc_89_cast_fp16)[name = string("op_6322_cast_fp16")]; fp16 _inversed_attn_weights_113_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_113_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_113_cast_fp16 = mul(x = var_6322_cast_fp16, y = _inversed_attn_weights_113_y_0_to_fp16)[name = string("_inversed_attn_weights_113_cast_fp16")]; tensor attn_weights_115_cast_fp16 = add(x = _inversed_attn_weights_113_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_115_cast_fp16")]; int32 var_6336 = const()[name = string("op_6336"), val = int32(-1)]; tensor attn_weights_119_cast_fp16 = softmax(axis = var_6336, x = attn_weights_115_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; bool attn_output_57_transpose_x_1 = const()[name = string("attn_output_57_transpose_x_1"), val = bool(false)]; bool attn_output_57_transpose_y_1 = const()[name = string("attn_output_57_transpose_y_1"), val = bool(true)]; tensor attn_output_57_cast_fp16 = matmul(transpose_x = attn_output_57_transpose_x_1, transpose_y = attn_output_57_transpose_y_1, x = attn_weights_119_cast_fp16, y = vc_89_cast_fp16)[name = string("attn_output_57_cast_fp16")]; tensor var_6345_perm_0 = const()[name = string("op_6345_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6349 = const()[name = string("op_6349"), val = tensor([1, 1, -1])]; tensor var_6345_cast_fp16 = transpose(perm = var_6345_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_52")]; tensor input_143_cast_fp16 = reshape(shape = var_6349, x = var_6345_cast_fp16)[name = string("input_143_cast_fp16")]; tensor layers_14_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226640704))))[name = string("layers_14_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_101_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_o_proj_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor var_6355_axes_0 = const()[name = string("op_6355_axes_0"), val = tensor([0])]; tensor var_6355_cast_fp16 = squeeze(axes = var_6355_axes_0, x = linear_101_cast_fp16)[name = string("op_6355_cast_fp16")]; tensor var_6357_axes_0 = const()[name = string("op_6357_axes_0"), val = tensor([0])]; tensor var_6357_cast_fp16 = squeeze(axes = var_6357_axes_0, x = var_6355_cast_fp16)[name = string("op_6357_cast_fp16")]; tensor var_6359_axes_0 = const()[name = string("op_6359_axes_0"), val = tensor([-1])]; tensor var_6359_cast_fp16 = expand_dims(axes = var_6359_axes_0, x = var_6357_cast_fp16)[name = string("op_6359_cast_fp16")]; tensor attn_4d_29_axes_0 = const()[name = string("attn_4d_29_axes_0"), val = tensor([-1])]; tensor attn_4d_29_cast_fp16 = expand_dims(axes = attn_4d_29_axes_0, x = var_6359_cast_fp16)[name = string("attn_4d_29_cast_fp16")]; tensor hidden_57_cast_fp16 = add(x = hidden_55_cast_fp16, y = attn_4d_29_cast_fp16)[name = string("hidden_57_cast_fp16")]; tensor var_6365_axes_0 = const()[name = string("op_6365_axes_0"), val = tensor([-1])]; tensor var_6365_cast_fp16 = squeeze(axes = var_6365_axes_0, x = hidden_57_cast_fp16)[name = string("op_6365_cast_fp16")]; tensor var_6367_axes_0 = const()[name = string("op_6367_axes_0"), val = tensor([-1])]; tensor var_6367_cast_fp16 = squeeze(axes = var_6367_axes_0, x = var_6365_cast_fp16)[name = string("op_6367_cast_fp16")]; tensor hidden_states_355_axes_0 = const()[name = string("hidden_states_355_axes_0"), val = tensor([0])]; tensor hidden_states_355_cast_fp16 = expand_dims(axes = hidden_states_355_axes_0, x = var_6367_cast_fp16)[name = string("hidden_states_355_cast_fp16")]; fp16 var_6373_promoted_to_fp16 = const()[name = string("op_6373_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6379_cast_fp16 = pow(x = hidden_states_355_cast_fp16, y = var_6373_promoted_to_fp16)[name = string("op_6379_cast_fp16")]; tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_6379_cast_fp16)[name = string("variance_119_cast_fp16")]; fp16 var_6382_to_fp16 = const()[name = string("op_6382_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6383_cast_fp16 = add(x = variance_119_cast_fp16, y = var_6382_to_fp16)[name = string("op_6383_cast_fp16")]; fp32 var_6384_epsilon_0 = const()[name = string("op_6384_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6384_cast_fp16 = rsqrt(epsilon = var_6384_epsilon_0, x = var_6383_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor hidden_states_359_cast_fp16 = mul(x = hidden_states_355_cast_fp16, y = var_6384_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226641280)))]; tensor input_145_cast_fp16 = mul(x = const_150_to_fp16, y = hidden_states_359_cast_fp16)[name = string("input_145_cast_fp16")]; tensor layers_14_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226643392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789184))))[name = string("layers_14_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_gate_proj_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_6394_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_6394_cast_fp16")]; tensor layers_14_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232935552))))[name = string("layers_14_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_up_proj_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_149_cast_fp16 = mul(x = var_6394_cast_fp16, y = linear_103_cast_fp16)[name = string("input_149_cast_fp16")]; tensor layers_14_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232936128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236081920))))[name = string("layers_14_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_104_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_mlp_down_proj_weight_to_fp16_palettized, x = input_149_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor var_6401_axes_0 = const()[name = string("op_6401_axes_0"), val = tensor([0])]; tensor var_6401_cast_fp16 = squeeze(axes = var_6401_axes_0, x = linear_104_cast_fp16)[name = string("op_6401_cast_fp16")]; tensor var_6403_axes_0 = const()[name = string("op_6403_axes_0"), val = tensor([0])]; tensor var_6403_cast_fp16 = squeeze(axes = var_6403_axes_0, x = var_6401_cast_fp16)[name = string("op_6403_cast_fp16")]; tensor var_6405_axes_0 = const()[name = string("op_6405_axes_0"), val = tensor([-1])]; tensor var_6405_cast_fp16 = expand_dims(axes = var_6405_axes_0, x = var_6403_cast_fp16)[name = string("op_6405_cast_fp16")]; tensor mlp_4d_29_axes_0 = const()[name = string("mlp_4d_29_axes_0"), val = tensor([-1])]; tensor mlp_4d_29_cast_fp16 = expand_dims(axes = mlp_4d_29_axes_0, x = var_6405_cast_fp16)[name = string("mlp_4d_29_cast_fp16")]; tensor hidden_59_cast_fp16 = add(x = hidden_57_cast_fp16, y = mlp_4d_29_cast_fp16)[name = string("hidden_59_cast_fp16")]; tensor var_6419_begin_0 = const()[name = string("op_6419_begin_0"), val = tensor([0, 15360, 0, 0])]; tensor var_6419_end_0 = const()[name = string("op_6419_end_0"), val = tensor([1, 16384, 1, 256])]; tensor var_6419_end_mask_0 = const()[name = string("op_6419_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6419_cast_fp16 = slice_by_index(begin = var_6419_begin_0, end = var_6419_end_0, end_mask = var_6419_end_mask_0, x = key_cache)[name = string("op_6419_cast_fp16")]; tensor var_6439_begin_0 = const()[name = string("op_6439_begin_0"), val = tensor([0, 15360, 0, 0])]; tensor var_6439_end_0 = const()[name = string("op_6439_end_0"), val = tensor([1, 16384, 1, 256])]; tensor var_6439_end_mask_0 = const()[name = string("op_6439_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6439_cast_fp16 = slice_by_index(begin = var_6439_begin_0, end = var_6439_end_0, end_mask = var_6439_end_mask_0, x = value_cache)[name = string("op_6439_cast_fp16")]; tensor var_6451_axes_0 = const()[name = string("op_6451_axes_0"), val = tensor([-1])]; tensor var_6451_cast_fp16 = squeeze(axes = var_6451_axes_0, x = hidden_59_cast_fp16)[name = string("op_6451_cast_fp16")]; tensor var_6453_axes_0 = const()[name = string("op_6453_axes_0"), val = tensor([-1])]; tensor var_6453_cast_fp16 = squeeze(axes = var_6453_axes_0, x = var_6451_cast_fp16)[name = string("op_6453_cast_fp16")]; tensor hidden_states_361_axes_0 = const()[name = string("hidden_states_361_axes_0"), val = tensor([0])]; tensor hidden_states_361_cast_fp16 = expand_dims(axes = hidden_states_361_axes_0, x = var_6453_cast_fp16)[name = string("hidden_states_361_cast_fp16")]; fp16 var_6459_promoted_to_fp16 = const()[name = string("op_6459_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6465_cast_fp16 = pow(x = hidden_states_361_cast_fp16, y = var_6459_promoted_to_fp16)[name = string("op_6465_cast_fp16")]; tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_6465_cast_fp16)[name = string("variance_121_cast_fp16")]; fp16 var_6468_to_fp16 = const()[name = string("op_6468_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6469_cast_fp16 = add(x = variance_121_cast_fp16, y = var_6468_to_fp16)[name = string("op_6469_cast_fp16")]; fp32 var_6470_epsilon_0 = const()[name = string("op_6470_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6470_cast_fp16 = rsqrt(epsilon = var_6470_epsilon_0, x = var_6469_cast_fp16)[name = string("op_6470_cast_fp16")]; tensor hidden_states_365_cast_fp16 = mul(x = hidden_states_361_cast_fp16, y = var_6470_cast_fp16)[name = string("hidden_states_365_cast_fp16")]; tensor const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236082496)))]; tensor input_151_cast_fp16 = mul(x = const_151_to_fp16, y = hidden_states_365_cast_fp16)[name = string("input_151_cast_fp16")]; tensor layers_15_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236084608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238181824))))[name = string("layers_15_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_15_self_attn_q_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor layers_15_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238182400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231040))))[name = string("layers_15_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_k_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor layers_15_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280256))))[name = string("layers_15_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_v_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor var_6487 = const()[name = string("op_6487"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_367_cast_fp16 = reshape(shape = var_6487, x = linear_105_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; tensor var_6493 = const()[name = string("op_6493"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_373_cast_fp16 = reshape(shape = var_6493, x = linear_106_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; tensor var_6499 = const()[name = string("op_6499"), val = tensor([1, 1, 8, 128])]; tensor v_93_cast_fp16 = reshape(shape = var_6499, x = linear_107_cast_fp16)[name = string("v_93_cast_fp16")]; fp16 var_6504_promoted_to_fp16 = const()[name = string("op_6504_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6510_cast_fp16 = pow(x = hidden_states_367_cast_fp16, y = var_6504_promoted_to_fp16)[name = string("op_6510_cast_fp16")]; tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([-1])]; bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; tensor variance_123_cast_fp16 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = var_6510_cast_fp16)[name = string("variance_123_cast_fp16")]; fp16 var_6513_to_fp16 = const()[name = string("op_6513_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6514_cast_fp16 = add(x = variance_123_cast_fp16, y = var_6513_to_fp16)[name = string("op_6514_cast_fp16")]; fp32 var_6515_epsilon_0 = const()[name = string("op_6515_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6515_cast_fp16 = rsqrt(epsilon = var_6515_epsilon_0, x = var_6514_cast_fp16)[name = string("op_6515_cast_fp16")]; tensor hidden_states_371_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = var_6515_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; tensor const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280832)))]; tensor q_123_cast_fp16 = mul(x = const_152_to_fp16, y = hidden_states_371_cast_fp16)[name = string("q_123_cast_fp16")]; fp16 var_6522_promoted_to_fp16 = const()[name = string("op_6522_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6528_cast_fp16 = pow(x = hidden_states_373_cast_fp16, y = var_6522_promoted_to_fp16)[name = string("op_6528_cast_fp16")]; tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([-1])]; bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; tensor variance_125_cast_fp16 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = var_6528_cast_fp16)[name = string("variance_125_cast_fp16")]; fp16 var_6531_to_fp16 = const()[name = string("op_6531_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6532_cast_fp16 = add(x = variance_125_cast_fp16, y = var_6531_to_fp16)[name = string("op_6532_cast_fp16")]; fp32 var_6533_epsilon_0 = const()[name = string("op_6533_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6533_cast_fp16 = rsqrt(epsilon = var_6533_epsilon_0, x = var_6532_cast_fp16)[name = string("op_6533_cast_fp16")]; tensor hidden_states_377_cast_fp16 = mul(x = hidden_states_373_cast_fp16, y = var_6533_cast_fp16)[name = string("hidden_states_377_cast_fp16")]; tensor const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281152)))]; tensor k_123_cast_fp16 = mul(x = const_153_to_fp16, y = hidden_states_377_cast_fp16)[name = string("k_123_cast_fp16")]; tensor q_125_perm_0 = const()[name = string("q_125_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_125_perm_0 = const()[name = string("k_125_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_95_perm_0 = const()[name = string("v_95_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_125_cast_fp16 = transpose(perm = q_125_perm_0, x = q_123_cast_fp16)[name = string("transpose_51")]; tensor var_6550_cast_fp16 = mul(x = q_125_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6550_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_125_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_125_cast_fp16)[name = string("x2_61_cast_fp16")]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6571_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_6571_cast_fp16")]; int32 var_6573 = const()[name = string("op_6573"), val = int32(-1)]; bool var_6574_interleave_0 = const()[name = string("op_6574_interleave_0"), val = bool(false)]; tensor var_6574_cast_fp16 = concat(axis = var_6573, interleave = var_6574_interleave_0, values = (var_6571_cast_fp16, x1_61_cast_fp16))[name = string("op_6574_cast_fp16")]; tensor var_6575_cast_fp16 = mul(x = var_6574_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6575_cast_fp16")]; tensor q_127_cast_fp16 = add(x = var_6550_cast_fp16, y = var_6575_cast_fp16)[name = string("q_127_cast_fp16")]; tensor k_125_cast_fp16 = transpose(perm = k_125_perm_0, x = k_123_cast_fp16)[name = string("transpose_50")]; tensor var_6578_cast_fp16 = mul(x = k_125_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6578_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_125_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_125_cast_fp16)[name = string("x2_63_cast_fp16")]; fp16 const_159_promoted_to_fp16 = const()[name = string("const_159_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6599_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_159_promoted_to_fp16)[name = string("op_6599_cast_fp16")]; int32 var_6601 = const()[name = string("op_6601"), val = int32(-1)]; bool var_6602_interleave_0 = const()[name = string("op_6602_interleave_0"), val = bool(false)]; tensor var_6602_cast_fp16 = concat(axis = var_6601, interleave = var_6602_interleave_0, values = (var_6599_cast_fp16, x1_63_cast_fp16))[name = string("op_6602_cast_fp16")]; tensor var_6603_cast_fp16 = mul(x = var_6602_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6603_cast_fp16")]; tensor k_127_cast_fp16 = add(x = var_6578_cast_fp16, y = var_6603_cast_fp16)[name = string("k_127_cast_fp16")]; tensor var_6610 = const()[name = string("op_6610"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_31_cast_fp16 = reshape(shape = var_6610, x = k_127_cast_fp16)[name = string("nk_flat_31_cast_fp16")]; tensor var_6616 = const()[name = string("op_6616"), val = tensor([1, 1024, 1, 1])]; tensor v_95_cast_fp16 = transpose(perm = v_95_perm_0, x = v_93_cast_fp16)[name = string("transpose_49")]; tensor nv_flat_31_cast_fp16 = reshape(shape = var_6616, x = v_95_cast_fp16)[name = string("nv_flat_31_cast_fp16")]; tensor var_6625_cast_fp16 = mul(x = var_6419_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6625_cast_fp16")]; tensor var_6626_cast_fp16 = mul(x = nk_flat_31_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6626_cast_fp16")]; tensor key_cache_65_cast_fp16 = add(x = var_6625_cast_fp16, y = var_6626_cast_fp16)[name = string("key_cache_65_cast_fp16")]; tensor var_6632_cast_fp16 = mul(x = var_6439_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6632_cast_fp16")]; tensor var_6633_cast_fp16 = mul(x = nv_flat_31_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6633_cast_fp16")]; tensor value_cache_65_cast_fp16 = add(x = var_6632_cast_fp16, y = var_6633_cast_fp16)[name = string("value_cache_65_cast_fp16")]; tensor kc_91_axes_0 = const()[name = string("kc_91_axes_0"), val = tensor([2])]; tensor kc_91_cast_fp16 = squeeze(axes = kc_91_axes_0, x = key_cache_65_cast_fp16)[name = string("kc_91_cast_fp16")]; tensor var_6642 = const()[name = string("op_6642"), val = tensor([1, 8, 128, 256])]; tensor kc_93_cast_fp16 = reshape(shape = var_6642, x = kc_91_cast_fp16)[name = string("kc_93_cast_fp16")]; tensor vc_91_axes_0 = const()[name = string("vc_91_axes_0"), val = tensor([2])]; tensor vc_91_cast_fp16 = squeeze(axes = vc_91_axes_0, x = value_cache_65_cast_fp16)[name = string("vc_91_cast_fp16")]; tensor var_6650 = const()[name = string("op_6650"), val = tensor([1, 8, 128, 256])]; tensor vc_93_cast_fp16 = reshape(shape = var_6650, x = vc_91_cast_fp16)[name = string("vc_93_cast_fp16")]; tensor var_6653_axes_0 = const()[name = string("op_6653_axes_0"), val = tensor([2])]; tensor var_6653_cast_fp16 = expand_dims(axes = var_6653_axes_0, x = kc_93_cast_fp16)[name = string("op_6653_cast_fp16")]; tensor var_6661_reps_0 = const()[name = string("op_6661_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6661_cast_fp16 = tile(reps = var_6661_reps_0, x = var_6653_cast_fp16)[name = string("op_6661_cast_fp16")]; tensor var_6666 = const()[name = string("op_6666"), val = tensor([1, 16, 128, 256])]; tensor kc_95_cast_fp16 = reshape(shape = var_6666, x = var_6661_cast_fp16)[name = string("kc_95_cast_fp16")]; tensor var_6669_axes_0 = const()[name = string("op_6669_axes_0"), val = tensor([2])]; tensor var_6669_cast_fp16 = expand_dims(axes = var_6669_axes_0, x = vc_93_cast_fp16)[name = string("op_6669_cast_fp16")]; tensor var_6677_reps_0 = const()[name = string("op_6677_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6677_cast_fp16 = tile(reps = var_6677_reps_0, x = var_6669_cast_fp16)[name = string("op_6677_cast_fp16")]; tensor var_6682 = const()[name = string("op_6682"), val = tensor([1, 16, 128, 256])]; tensor vc_95_cast_fp16 = reshape(shape = var_6682, x = var_6677_cast_fp16)[name = string("vc_95_cast_fp16")]; bool var_6684_transpose_x_0 = const()[name = string("op_6684_transpose_x_0"), val = bool(false)]; bool var_6684_transpose_y_0 = const()[name = string("op_6684_transpose_y_0"), val = bool(false)]; tensor var_6684_cast_fp16 = matmul(transpose_x = var_6684_transpose_x_0, transpose_y = var_6684_transpose_y_0, x = q_127_cast_fp16, y = kc_95_cast_fp16)[name = string("op_6684_cast_fp16")]; fp16 _inversed_attn_weights_121_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_121_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_121_cast_fp16 = mul(x = var_6684_cast_fp16, y = _inversed_attn_weights_121_y_0_to_fp16)[name = string("_inversed_attn_weights_121_cast_fp16")]; tensor attn_weights_123_cast_fp16 = add(x = _inversed_attn_weights_121_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_123_cast_fp16")]; int32 var_6698 = const()[name = string("op_6698"), val = int32(-1)]; tensor attn_weights_127_cast_fp16 = softmax(axis = var_6698, x = attn_weights_123_cast_fp16)[name = string("attn_weights_127_cast_fp16")]; bool attn_output_61_transpose_x_1 = const()[name = string("attn_output_61_transpose_x_1"), val = bool(false)]; bool attn_output_61_transpose_y_1 = const()[name = string("attn_output_61_transpose_y_1"), val = bool(true)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_1, transpose_y = attn_output_61_transpose_y_1, x = attn_weights_127_cast_fp16, y = vc_95_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_6707_perm_0 = const()[name = string("op_6707_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6711 = const()[name = string("op_6711"), val = tensor([1, 1, -1])]; tensor var_6707_cast_fp16 = transpose(perm = var_6707_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_48")]; tensor input_153_cast_fp16 = reshape(shape = var_6711, x = var_6707_cast_fp16)[name = string("input_153_cast_fp16")]; tensor layers_15_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242378688))))[name = string("layers_15_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_108_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_o_proj_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor var_6717_axes_0 = const()[name = string("op_6717_axes_0"), val = tensor([0])]; tensor var_6717_cast_fp16 = squeeze(axes = var_6717_axes_0, x = linear_108_cast_fp16)[name = string("op_6717_cast_fp16")]; tensor var_6719_axes_0 = const()[name = string("op_6719_axes_0"), val = tensor([0])]; tensor var_6719_cast_fp16 = squeeze(axes = var_6719_axes_0, x = var_6717_cast_fp16)[name = string("op_6719_cast_fp16")]; tensor var_6721_axes_0 = const()[name = string("op_6721_axes_0"), val = tensor([-1])]; tensor var_6721_cast_fp16 = expand_dims(axes = var_6721_axes_0, x = var_6719_cast_fp16)[name = string("op_6721_cast_fp16")]; tensor attn_4d_31_axes_0 = const()[name = string("attn_4d_31_axes_0"), val = tensor([-1])]; tensor attn_4d_31_cast_fp16 = expand_dims(axes = attn_4d_31_axes_0, x = var_6721_cast_fp16)[name = string("attn_4d_31_cast_fp16")]; tensor hidden_61_cast_fp16 = add(x = hidden_59_cast_fp16, y = attn_4d_31_cast_fp16)[name = string("hidden_61_cast_fp16")]; tensor var_6727_axes_0 = const()[name = string("op_6727_axes_0"), val = tensor([-1])]; tensor var_6727_cast_fp16 = squeeze(axes = var_6727_axes_0, x = hidden_61_cast_fp16)[name = string("op_6727_cast_fp16")]; tensor var_6729_axes_0 = const()[name = string("op_6729_axes_0"), val = tensor([-1])]; tensor var_6729_cast_fp16 = squeeze(axes = var_6729_axes_0, x = var_6727_cast_fp16)[name = string("op_6729_cast_fp16")]; tensor hidden_states_379_axes_0 = const()[name = string("hidden_states_379_axes_0"), val = tensor([0])]; tensor hidden_states_379_cast_fp16 = expand_dims(axes = hidden_states_379_axes_0, x = var_6729_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; fp16 var_6735_promoted_to_fp16 = const()[name = string("op_6735_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6741_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_6735_promoted_to_fp16)[name = string("op_6741_cast_fp16")]; tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_6741_cast_fp16)[name = string("variance_127_cast_fp16")]; fp16 var_6744_to_fp16 = const()[name = string("op_6744_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6745_cast_fp16 = add(x = variance_127_cast_fp16, y = var_6744_to_fp16)[name = string("op_6745_cast_fp16")]; fp32 var_6746_epsilon_0 = const()[name = string("op_6746_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6746_cast_fp16 = rsqrt(epsilon = var_6746_epsilon_0, x = var_6745_cast_fp16)[name = string("op_6746_cast_fp16")]; tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_6746_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242379264)))]; tensor input_155_cast_fp16 = mul(x = const_160_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_155_cast_fp16")]; tensor layers_15_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242381376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527168))))[name = string("layers_15_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_gate_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_6756_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_6756_cast_fp16")]; tensor layers_15_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248673536))))[name = string("layers_15_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_up_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_159_cast_fp16 = mul(x = var_6756_cast_fp16, y = linear_110_cast_fp16)[name = string("input_159_cast_fp16")]; tensor layers_15_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248674112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251819904))))[name = string("layers_15_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_mlp_down_proj_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor var_6763_axes_0 = const()[name = string("op_6763_axes_0"), val = tensor([0])]; tensor var_6763_cast_fp16 = squeeze(axes = var_6763_axes_0, x = linear_111_cast_fp16)[name = string("op_6763_cast_fp16")]; tensor var_6765_axes_0 = const()[name = string("op_6765_axes_0"), val = tensor([0])]; tensor var_6765_cast_fp16 = squeeze(axes = var_6765_axes_0, x = var_6763_cast_fp16)[name = string("op_6765_cast_fp16")]; tensor var_6767_axes_0 = const()[name = string("op_6767_axes_0"), val = tensor([-1])]; tensor var_6767_cast_fp16 = expand_dims(axes = var_6767_axes_0, x = var_6765_cast_fp16)[name = string("op_6767_cast_fp16")]; tensor mlp_4d_31_axes_0 = const()[name = string("mlp_4d_31_axes_0"), val = tensor([-1])]; tensor mlp_4d_31_cast_fp16 = expand_dims(axes = mlp_4d_31_axes_0, x = var_6767_cast_fp16)[name = string("mlp_4d_31_cast_fp16")]; tensor hidden_63_cast_fp16 = add(x = hidden_61_cast_fp16, y = mlp_4d_31_cast_fp16)[name = string("hidden_63_cast_fp16")]; tensor var_6781_begin_0 = const()[name = string("op_6781_begin_0"), val = tensor([0, 16384, 0, 0])]; tensor var_6781_end_0 = const()[name = string("op_6781_end_0"), val = tensor([1, 17408, 1, 256])]; tensor var_6781_end_mask_0 = const()[name = string("op_6781_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6781_cast_fp16 = slice_by_index(begin = var_6781_begin_0, end = var_6781_end_0, end_mask = var_6781_end_mask_0, x = key_cache)[name = string("op_6781_cast_fp16")]; tensor var_6801_begin_0 = const()[name = string("op_6801_begin_0"), val = tensor([0, 16384, 0, 0])]; tensor var_6801_end_0 = const()[name = string("op_6801_end_0"), val = tensor([1, 17408, 1, 256])]; tensor var_6801_end_mask_0 = const()[name = string("op_6801_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6801_cast_fp16 = slice_by_index(begin = var_6801_begin_0, end = var_6801_end_0, end_mask = var_6801_end_mask_0, x = value_cache)[name = string("op_6801_cast_fp16")]; tensor var_6813_axes_0 = const()[name = string("op_6813_axes_0"), val = tensor([-1])]; tensor var_6813_cast_fp16 = squeeze(axes = var_6813_axes_0, x = hidden_63_cast_fp16)[name = string("op_6813_cast_fp16")]; tensor var_6815_axes_0 = const()[name = string("op_6815_axes_0"), val = tensor([-1])]; tensor var_6815_cast_fp16 = squeeze(axes = var_6815_axes_0, x = var_6813_cast_fp16)[name = string("op_6815_cast_fp16")]; tensor hidden_states_385_axes_0 = const()[name = string("hidden_states_385_axes_0"), val = tensor([0])]; tensor hidden_states_385_cast_fp16 = expand_dims(axes = hidden_states_385_axes_0, x = var_6815_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; fp16 var_6821_promoted_to_fp16 = const()[name = string("op_6821_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6827_cast_fp16 = pow(x = hidden_states_385_cast_fp16, y = var_6821_promoted_to_fp16)[name = string("op_6827_cast_fp16")]; tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([-1])]; bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; tensor variance_129_cast_fp16 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = var_6827_cast_fp16)[name = string("variance_129_cast_fp16")]; fp16 var_6830_to_fp16 = const()[name = string("op_6830_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6831_cast_fp16 = add(x = variance_129_cast_fp16, y = var_6830_to_fp16)[name = string("op_6831_cast_fp16")]; fp32 var_6832_epsilon_0 = const()[name = string("op_6832_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6832_cast_fp16 = rsqrt(epsilon = var_6832_epsilon_0, x = var_6831_cast_fp16)[name = string("op_6832_cast_fp16")]; tensor hidden_states_389_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = var_6832_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251820480)))]; tensor input_161_cast_fp16 = mul(x = const_161_to_fp16, y = hidden_states_389_cast_fp16)[name = string("input_161_cast_fp16")]; tensor layers_16_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253919808))))[name = string("layers_16_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_16_self_attn_q_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor layers_16_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253920384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969024))))[name = string("layers_16_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_k_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor layers_16_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018240))))[name = string("layers_16_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_v_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor var_6849 = const()[name = string("op_6849"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_391_cast_fp16 = reshape(shape = var_6849, x = linear_112_cast_fp16)[name = string("hidden_states_391_cast_fp16")]; tensor var_6855 = const()[name = string("op_6855"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_397_cast_fp16 = reshape(shape = var_6855, x = linear_113_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; tensor var_6861 = const()[name = string("op_6861"), val = tensor([1, 1, 8, 128])]; tensor v_99_cast_fp16 = reshape(shape = var_6861, x = linear_114_cast_fp16)[name = string("v_99_cast_fp16")]; fp16 var_6866_promoted_to_fp16 = const()[name = string("op_6866_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6872_cast_fp16 = pow(x = hidden_states_391_cast_fp16, y = var_6866_promoted_to_fp16)[name = string("op_6872_cast_fp16")]; tensor variance_131_axes_0 = const()[name = string("variance_131_axes_0"), val = tensor([-1])]; bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; tensor variance_131_cast_fp16 = reduce_mean(axes = variance_131_axes_0, keep_dims = variance_131_keep_dims_0, x = var_6872_cast_fp16)[name = string("variance_131_cast_fp16")]; fp16 var_6875_to_fp16 = const()[name = string("op_6875_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6876_cast_fp16 = add(x = variance_131_cast_fp16, y = var_6875_to_fp16)[name = string("op_6876_cast_fp16")]; fp32 var_6877_epsilon_0 = const()[name = string("op_6877_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6877_cast_fp16 = rsqrt(epsilon = var_6877_epsilon_0, x = var_6876_cast_fp16)[name = string("op_6877_cast_fp16")]; tensor hidden_states_395_cast_fp16 = mul(x = hidden_states_391_cast_fp16, y = var_6877_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018816)))]; tensor q_131_cast_fp16 = mul(x = const_162_to_fp16, y = hidden_states_395_cast_fp16)[name = string("q_131_cast_fp16")]; fp16 var_6884_promoted_to_fp16 = const()[name = string("op_6884_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6890_cast_fp16 = pow(x = hidden_states_397_cast_fp16, y = var_6884_promoted_to_fp16)[name = string("op_6890_cast_fp16")]; tensor variance_133_axes_0 = const()[name = string("variance_133_axes_0"), val = tensor([-1])]; bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; tensor variance_133_cast_fp16 = reduce_mean(axes = variance_133_axes_0, keep_dims = variance_133_keep_dims_0, x = var_6890_cast_fp16)[name = string("variance_133_cast_fp16")]; fp16 var_6893_to_fp16 = const()[name = string("op_6893_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6894_cast_fp16 = add(x = variance_133_cast_fp16, y = var_6893_to_fp16)[name = string("op_6894_cast_fp16")]; fp32 var_6895_epsilon_0 = const()[name = string("op_6895_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6895_cast_fp16 = rsqrt(epsilon = var_6895_epsilon_0, x = var_6894_cast_fp16)[name = string("op_6895_cast_fp16")]; tensor hidden_states_401_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = var_6895_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019136)))]; tensor k_131_cast_fp16 = mul(x = const_163_to_fp16, y = hidden_states_401_cast_fp16)[name = string("k_131_cast_fp16")]; tensor q_133_perm_0 = const()[name = string("q_133_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_133_perm_0 = const()[name = string("k_133_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_101_perm_0 = const()[name = string("v_101_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_133_cast_fp16 = transpose(perm = q_133_perm_0, x = q_131_cast_fp16)[name = string("transpose_47")]; tensor var_6912_cast_fp16 = mul(x = q_133_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6912_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_133_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_133_cast_fp16)[name = string("x2_65_cast_fp16")]; fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6933_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_6933_cast_fp16")]; int32 var_6935 = const()[name = string("op_6935"), val = int32(-1)]; bool var_6936_interleave_0 = const()[name = string("op_6936_interleave_0"), val = bool(false)]; tensor var_6936_cast_fp16 = concat(axis = var_6935, interleave = var_6936_interleave_0, values = (var_6933_cast_fp16, x1_65_cast_fp16))[name = string("op_6936_cast_fp16")]; tensor var_6937_cast_fp16 = mul(x = var_6936_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6937_cast_fp16")]; tensor q_135_cast_fp16 = add(x = var_6912_cast_fp16, y = var_6937_cast_fp16)[name = string("q_135_cast_fp16")]; tensor k_133_cast_fp16 = transpose(perm = k_133_perm_0, x = k_131_cast_fp16)[name = string("transpose_46")]; tensor var_6940_cast_fp16 = mul(x = k_133_cast_fp16, y = cos_3_cast_fp16)[name = string("op_6940_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_133_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_133_cast_fp16)[name = string("x2_67_cast_fp16")]; fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6961_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_6961_cast_fp16")]; int32 var_6963 = const()[name = string("op_6963"), val = int32(-1)]; bool var_6964_interleave_0 = const()[name = string("op_6964_interleave_0"), val = bool(false)]; tensor var_6964_cast_fp16 = concat(axis = var_6963, interleave = var_6964_interleave_0, values = (var_6961_cast_fp16, x1_67_cast_fp16))[name = string("op_6964_cast_fp16")]; tensor var_6965_cast_fp16 = mul(x = var_6964_cast_fp16, y = sin_3_cast_fp16)[name = string("op_6965_cast_fp16")]; tensor k_135_cast_fp16 = add(x = var_6940_cast_fp16, y = var_6965_cast_fp16)[name = string("k_135_cast_fp16")]; tensor var_6972 = const()[name = string("op_6972"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_33_cast_fp16 = reshape(shape = var_6972, x = k_135_cast_fp16)[name = string("nk_flat_33_cast_fp16")]; tensor var_6978 = const()[name = string("op_6978"), val = tensor([1, 1024, 1, 1])]; tensor v_101_cast_fp16 = transpose(perm = v_101_perm_0, x = v_99_cast_fp16)[name = string("transpose_45")]; tensor nv_flat_33_cast_fp16 = reshape(shape = var_6978, x = v_101_cast_fp16)[name = string("nv_flat_33_cast_fp16")]; tensor var_6987_cast_fp16 = mul(x = var_6781_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6987_cast_fp16")]; tensor var_6988_cast_fp16 = mul(x = nk_flat_33_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6988_cast_fp16")]; tensor key_cache_69_cast_fp16 = add(x = var_6987_cast_fp16, y = var_6988_cast_fp16)[name = string("key_cache_69_cast_fp16")]; tensor var_6994_cast_fp16 = mul(x = var_6801_cast_fp16, y = var_1194_cast_fp16)[name = string("op_6994_cast_fp16")]; tensor var_6995_cast_fp16 = mul(x = nv_flat_33_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_6995_cast_fp16")]; tensor value_cache_69_cast_fp16 = add(x = var_6994_cast_fp16, y = var_6995_cast_fp16)[name = string("value_cache_69_cast_fp16")]; tensor kc_97_axes_0 = const()[name = string("kc_97_axes_0"), val = tensor([2])]; tensor kc_97_cast_fp16 = squeeze(axes = kc_97_axes_0, x = key_cache_69_cast_fp16)[name = string("kc_97_cast_fp16")]; tensor var_7004 = const()[name = string("op_7004"), val = tensor([1, 8, 128, 256])]; tensor kc_99_cast_fp16 = reshape(shape = var_7004, x = kc_97_cast_fp16)[name = string("kc_99_cast_fp16")]; tensor vc_97_axes_0 = const()[name = string("vc_97_axes_0"), val = tensor([2])]; tensor vc_97_cast_fp16 = squeeze(axes = vc_97_axes_0, x = value_cache_69_cast_fp16)[name = string("vc_97_cast_fp16")]; tensor var_7012 = const()[name = string("op_7012"), val = tensor([1, 8, 128, 256])]; tensor vc_99_cast_fp16 = reshape(shape = var_7012, x = vc_97_cast_fp16)[name = string("vc_99_cast_fp16")]; tensor var_7015_axes_0 = const()[name = string("op_7015_axes_0"), val = tensor([2])]; tensor var_7015_cast_fp16 = expand_dims(axes = var_7015_axes_0, x = kc_99_cast_fp16)[name = string("op_7015_cast_fp16")]; tensor var_7023_reps_0 = const()[name = string("op_7023_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7023_cast_fp16 = tile(reps = var_7023_reps_0, x = var_7015_cast_fp16)[name = string("op_7023_cast_fp16")]; tensor var_7028 = const()[name = string("op_7028"), val = tensor([1, 16, 128, 256])]; tensor kc_101_cast_fp16 = reshape(shape = var_7028, x = var_7023_cast_fp16)[name = string("kc_101_cast_fp16")]; tensor var_7031_axes_0 = const()[name = string("op_7031_axes_0"), val = tensor([2])]; tensor var_7031_cast_fp16 = expand_dims(axes = var_7031_axes_0, x = vc_99_cast_fp16)[name = string("op_7031_cast_fp16")]; tensor var_7039_reps_0 = const()[name = string("op_7039_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7039_cast_fp16 = tile(reps = var_7039_reps_0, x = var_7031_cast_fp16)[name = string("op_7039_cast_fp16")]; tensor var_7044 = const()[name = string("op_7044"), val = tensor([1, 16, 128, 256])]; tensor vc_101_cast_fp16 = reshape(shape = var_7044, x = var_7039_cast_fp16)[name = string("vc_101_cast_fp16")]; bool var_7046_transpose_x_0 = const()[name = string("op_7046_transpose_x_0"), val = bool(false)]; bool var_7046_transpose_y_0 = const()[name = string("op_7046_transpose_y_0"), val = bool(false)]; tensor var_7046_cast_fp16 = matmul(transpose_x = var_7046_transpose_x_0, transpose_y = var_7046_transpose_y_0, x = q_135_cast_fp16, y = kc_101_cast_fp16)[name = string("op_7046_cast_fp16")]; fp16 _inversed_attn_weights_129_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_129_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_129_cast_fp16 = mul(x = var_7046_cast_fp16, y = _inversed_attn_weights_129_y_0_to_fp16)[name = string("_inversed_attn_weights_129_cast_fp16")]; tensor attn_weights_131_cast_fp16 = add(x = _inversed_attn_weights_129_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; int32 var_7060 = const()[name = string("op_7060"), val = int32(-1)]; tensor attn_weights_135_cast_fp16 = softmax(axis = var_7060, x = attn_weights_131_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; bool attn_output_65_transpose_x_1 = const()[name = string("attn_output_65_transpose_x_1"), val = bool(false)]; bool attn_output_65_transpose_y_1 = const()[name = string("attn_output_65_transpose_y_1"), val = bool(true)]; tensor attn_output_65_cast_fp16 = matmul(transpose_x = attn_output_65_transpose_x_1, transpose_y = attn_output_65_transpose_y_1, x = attn_weights_135_cast_fp16, y = vc_101_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_7069_perm_0 = const()[name = string("op_7069_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7073 = const()[name = string("op_7073"), val = tensor([1, 1, -1])]; tensor var_7069_cast_fp16 = transpose(perm = var_7069_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_44")]; tensor input_163_cast_fp16 = reshape(shape = var_7073, x = var_7069_cast_fp16)[name = string("input_163_cast_fp16")]; tensor layers_16_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258116672))))[name = string("layers_16_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_o_proj_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor var_7079_axes_0 = const()[name = string("op_7079_axes_0"), val = tensor([0])]; tensor var_7079_cast_fp16 = squeeze(axes = var_7079_axes_0, x = linear_115_cast_fp16)[name = string("op_7079_cast_fp16")]; tensor var_7081_axes_0 = const()[name = string("op_7081_axes_0"), val = tensor([0])]; tensor var_7081_cast_fp16 = squeeze(axes = var_7081_axes_0, x = var_7079_cast_fp16)[name = string("op_7081_cast_fp16")]; tensor var_7083_axes_0 = const()[name = string("op_7083_axes_0"), val = tensor([-1])]; tensor var_7083_cast_fp16 = expand_dims(axes = var_7083_axes_0, x = var_7081_cast_fp16)[name = string("op_7083_cast_fp16")]; tensor attn_4d_33_axes_0 = const()[name = string("attn_4d_33_axes_0"), val = tensor([-1])]; tensor attn_4d_33_cast_fp16 = expand_dims(axes = attn_4d_33_axes_0, x = var_7083_cast_fp16)[name = string("attn_4d_33_cast_fp16")]; tensor hidden_65_cast_fp16 = add(x = hidden_63_cast_fp16, y = attn_4d_33_cast_fp16)[name = string("hidden_65_cast_fp16")]; tensor var_7089_axes_0 = const()[name = string("op_7089_axes_0"), val = tensor([-1])]; tensor var_7089_cast_fp16 = squeeze(axes = var_7089_axes_0, x = hidden_65_cast_fp16)[name = string("op_7089_cast_fp16")]; tensor var_7091_axes_0 = const()[name = string("op_7091_axes_0"), val = tensor([-1])]; tensor var_7091_cast_fp16 = squeeze(axes = var_7091_axes_0, x = var_7089_cast_fp16)[name = string("op_7091_cast_fp16")]; tensor hidden_states_403_axes_0 = const()[name = string("hidden_states_403_axes_0"), val = tensor([0])]; tensor hidden_states_403_cast_fp16 = expand_dims(axes = hidden_states_403_axes_0, x = var_7091_cast_fp16)[name = string("hidden_states_403_cast_fp16")]; fp16 var_7097_promoted_to_fp16 = const()[name = string("op_7097_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7103_cast_fp16 = pow(x = hidden_states_403_cast_fp16, y = var_7097_promoted_to_fp16)[name = string("op_7103_cast_fp16")]; tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([-1])]; bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; tensor variance_135_cast_fp16 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = var_7103_cast_fp16)[name = string("variance_135_cast_fp16")]; fp16 var_7106_to_fp16 = const()[name = string("op_7106_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7107_cast_fp16 = add(x = variance_135_cast_fp16, y = var_7106_to_fp16)[name = string("op_7107_cast_fp16")]; fp32 var_7108_epsilon_0 = const()[name = string("op_7108_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7108_cast_fp16 = rsqrt(epsilon = var_7108_epsilon_0, x = var_7107_cast_fp16)[name = string("op_7108_cast_fp16")]; tensor hidden_states_407_cast_fp16 = mul(x = hidden_states_403_cast_fp16, y = var_7108_cast_fp16)[name = string("hidden_states_407_cast_fp16")]; tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258117248)))]; tensor input_165_cast_fp16 = mul(x = const_170_to_fp16, y = hidden_states_407_cast_fp16)[name = string("input_165_cast_fp16")]; tensor layers_16_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258119360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265152))))[name = string("layers_16_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_gate_proj_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_7118_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_7118_cast_fp16")]; tensor layers_16_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264411520))))[name = string("layers_16_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_up_proj_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_169_cast_fp16 = mul(x = var_7118_cast_fp16, y = linear_117_cast_fp16)[name = string("input_169_cast_fp16")]; tensor layers_16_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264412096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267557888))))[name = string("layers_16_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_mlp_down_proj_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor var_7125_axes_0 = const()[name = string("op_7125_axes_0"), val = tensor([0])]; tensor var_7125_cast_fp16 = squeeze(axes = var_7125_axes_0, x = linear_118_cast_fp16)[name = string("op_7125_cast_fp16")]; tensor var_7127_axes_0 = const()[name = string("op_7127_axes_0"), val = tensor([0])]; tensor var_7127_cast_fp16 = squeeze(axes = var_7127_axes_0, x = var_7125_cast_fp16)[name = string("op_7127_cast_fp16")]; tensor var_7129_axes_0 = const()[name = string("op_7129_axes_0"), val = tensor([-1])]; tensor var_7129_cast_fp16 = expand_dims(axes = var_7129_axes_0, x = var_7127_cast_fp16)[name = string("op_7129_cast_fp16")]; tensor mlp_4d_33_axes_0 = const()[name = string("mlp_4d_33_axes_0"), val = tensor([-1])]; tensor mlp_4d_33_cast_fp16 = expand_dims(axes = mlp_4d_33_axes_0, x = var_7129_cast_fp16)[name = string("mlp_4d_33_cast_fp16")]; tensor hidden_67_cast_fp16 = add(x = hidden_65_cast_fp16, y = mlp_4d_33_cast_fp16)[name = string("hidden_67_cast_fp16")]; tensor var_7143_begin_0 = const()[name = string("op_7143_begin_0"), val = tensor([0, 17408, 0, 0])]; tensor var_7143_end_0 = const()[name = string("op_7143_end_0"), val = tensor([1, 18432, 1, 256])]; tensor var_7143_end_mask_0 = const()[name = string("op_7143_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7143_cast_fp16 = slice_by_index(begin = var_7143_begin_0, end = var_7143_end_0, end_mask = var_7143_end_mask_0, x = key_cache)[name = string("op_7143_cast_fp16")]; tensor var_7163_begin_0 = const()[name = string("op_7163_begin_0"), val = tensor([0, 17408, 0, 0])]; tensor var_7163_end_0 = const()[name = string("op_7163_end_0"), val = tensor([1, 18432, 1, 256])]; tensor var_7163_end_mask_0 = const()[name = string("op_7163_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7163_cast_fp16 = slice_by_index(begin = var_7163_begin_0, end = var_7163_end_0, end_mask = var_7163_end_mask_0, x = value_cache)[name = string("op_7163_cast_fp16")]; tensor var_7175_axes_0 = const()[name = string("op_7175_axes_0"), val = tensor([-1])]; tensor var_7175_cast_fp16 = squeeze(axes = var_7175_axes_0, x = hidden_67_cast_fp16)[name = string("op_7175_cast_fp16")]; tensor var_7177_axes_0 = const()[name = string("op_7177_axes_0"), val = tensor([-1])]; tensor var_7177_cast_fp16 = squeeze(axes = var_7177_axes_0, x = var_7175_cast_fp16)[name = string("op_7177_cast_fp16")]; tensor hidden_states_409_axes_0 = const()[name = string("hidden_states_409_axes_0"), val = tensor([0])]; tensor hidden_states_409_cast_fp16 = expand_dims(axes = hidden_states_409_axes_0, x = var_7177_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; fp16 var_7183_promoted_to_fp16 = const()[name = string("op_7183_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7189_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_7183_promoted_to_fp16)[name = string("op_7189_cast_fp16")]; tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([-1])]; bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; tensor variance_137_cast_fp16 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = var_7189_cast_fp16)[name = string("variance_137_cast_fp16")]; fp16 var_7192_to_fp16 = const()[name = string("op_7192_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7193_cast_fp16 = add(x = variance_137_cast_fp16, y = var_7192_to_fp16)[name = string("op_7193_cast_fp16")]; fp32 var_7194_epsilon_0 = const()[name = string("op_7194_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7194_cast_fp16 = rsqrt(epsilon = var_7194_epsilon_0, x = var_7193_cast_fp16)[name = string("op_7194_cast_fp16")]; tensor hidden_states_413_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_7194_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267558464)))]; tensor input_171_cast_fp16 = mul(x = const_171_to_fp16, y = hidden_states_413_cast_fp16)[name = string("input_171_cast_fp16")]; tensor layers_17_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267560576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269657792))))[name = string("layers_17_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_17_self_attn_q_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor layers_17_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269658368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707008))))[name = string("layers_17_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_k_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor layers_17_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756224))))[name = string("layers_17_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_v_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor var_7211 = const()[name = string("op_7211"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_415_cast_fp16 = reshape(shape = var_7211, x = linear_119_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; tensor var_7217 = const()[name = string("op_7217"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_421_cast_fp16 = reshape(shape = var_7217, x = linear_120_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; tensor var_7223 = const()[name = string("op_7223"), val = tensor([1, 1, 8, 128])]; tensor v_105_cast_fp16 = reshape(shape = var_7223, x = linear_121_cast_fp16)[name = string("v_105_cast_fp16")]; fp16 var_7228_promoted_to_fp16 = const()[name = string("op_7228_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7234_cast_fp16 = pow(x = hidden_states_415_cast_fp16, y = var_7228_promoted_to_fp16)[name = string("op_7234_cast_fp16")]; tensor variance_139_axes_0 = const()[name = string("variance_139_axes_0"), val = tensor([-1])]; bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; tensor variance_139_cast_fp16 = reduce_mean(axes = variance_139_axes_0, keep_dims = variance_139_keep_dims_0, x = var_7234_cast_fp16)[name = string("variance_139_cast_fp16")]; fp16 var_7237_to_fp16 = const()[name = string("op_7237_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7238_cast_fp16 = add(x = variance_139_cast_fp16, y = var_7237_to_fp16)[name = string("op_7238_cast_fp16")]; fp32 var_7239_epsilon_0 = const()[name = string("op_7239_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7239_cast_fp16 = rsqrt(epsilon = var_7239_epsilon_0, x = var_7238_cast_fp16)[name = string("op_7239_cast_fp16")]; tensor hidden_states_419_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = var_7239_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756800)))]; tensor q_139_cast_fp16 = mul(x = const_172_to_fp16, y = hidden_states_419_cast_fp16)[name = string("q_139_cast_fp16")]; fp16 var_7246_promoted_to_fp16 = const()[name = string("op_7246_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7252_cast_fp16 = pow(x = hidden_states_421_cast_fp16, y = var_7246_promoted_to_fp16)[name = string("op_7252_cast_fp16")]; tensor variance_141_axes_0 = const()[name = string("variance_141_axes_0"), val = tensor([-1])]; bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; tensor variance_141_cast_fp16 = reduce_mean(axes = variance_141_axes_0, keep_dims = variance_141_keep_dims_0, x = var_7252_cast_fp16)[name = string("variance_141_cast_fp16")]; fp16 var_7255_to_fp16 = const()[name = string("op_7255_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7256_cast_fp16 = add(x = variance_141_cast_fp16, y = var_7255_to_fp16)[name = string("op_7256_cast_fp16")]; fp32 var_7257_epsilon_0 = const()[name = string("op_7257_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7257_cast_fp16 = rsqrt(epsilon = var_7257_epsilon_0, x = var_7256_cast_fp16)[name = string("op_7257_cast_fp16")]; tensor hidden_states_425_cast_fp16 = mul(x = hidden_states_421_cast_fp16, y = var_7257_cast_fp16)[name = string("hidden_states_425_cast_fp16")]; tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757120)))]; tensor k_139_cast_fp16 = mul(x = const_173_to_fp16, y = hidden_states_425_cast_fp16)[name = string("k_139_cast_fp16")]; tensor q_141_perm_0 = const()[name = string("q_141_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_141_perm_0 = const()[name = string("k_141_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_107_perm_0 = const()[name = string("v_107_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_141_cast_fp16 = transpose(perm = q_141_perm_0, x = q_139_cast_fp16)[name = string("transpose_43")]; tensor var_7274_cast_fp16 = mul(x = q_141_cast_fp16, y = cos_3_cast_fp16)[name = string("op_7274_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_141_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_141_cast_fp16)[name = string("x2_69_cast_fp16")]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7295_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_176_promoted_to_fp16)[name = string("op_7295_cast_fp16")]; int32 var_7297 = const()[name = string("op_7297"), val = int32(-1)]; bool var_7298_interleave_0 = const()[name = string("op_7298_interleave_0"), val = bool(false)]; tensor var_7298_cast_fp16 = concat(axis = var_7297, interleave = var_7298_interleave_0, values = (var_7295_cast_fp16, x1_69_cast_fp16))[name = string("op_7298_cast_fp16")]; tensor var_7299_cast_fp16 = mul(x = var_7298_cast_fp16, y = sin_3_cast_fp16)[name = string("op_7299_cast_fp16")]; tensor q_143_cast_fp16 = add(x = var_7274_cast_fp16, y = var_7299_cast_fp16)[name = string("q_143_cast_fp16")]; tensor k_141_cast_fp16 = transpose(perm = k_141_perm_0, x = k_139_cast_fp16)[name = string("transpose_42")]; tensor var_7302_cast_fp16 = mul(x = k_141_cast_fp16, y = cos_3_cast_fp16)[name = string("op_7302_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_141_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_141_cast_fp16)[name = string("x2_71_cast_fp16")]; fp16 const_179_promoted_to_fp16 = const()[name = string("const_179_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7323_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_179_promoted_to_fp16)[name = string("op_7323_cast_fp16")]; int32 var_7325 = const()[name = string("op_7325"), val = int32(-1)]; bool var_7326_interleave_0 = const()[name = string("op_7326_interleave_0"), val = bool(false)]; tensor var_7326_cast_fp16 = concat(axis = var_7325, interleave = var_7326_interleave_0, values = (var_7323_cast_fp16, x1_71_cast_fp16))[name = string("op_7326_cast_fp16")]; tensor var_7327_cast_fp16 = mul(x = var_7326_cast_fp16, y = sin_3_cast_fp16)[name = string("op_7327_cast_fp16")]; tensor k_143_cast_fp16 = add(x = var_7302_cast_fp16, y = var_7327_cast_fp16)[name = string("k_143_cast_fp16")]; tensor var_7334 = const()[name = string("op_7334"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_35_cast_fp16 = reshape(shape = var_7334, x = k_143_cast_fp16)[name = string("nk_flat_35_cast_fp16")]; tensor var_7340 = const()[name = string("op_7340"), val = tensor([1, 1024, 1, 1])]; tensor v_107_cast_fp16 = transpose(perm = v_107_perm_0, x = v_105_cast_fp16)[name = string("transpose_41")]; tensor nv_flat_35_cast_fp16 = reshape(shape = var_7340, x = v_107_cast_fp16)[name = string("nv_flat_35_cast_fp16")]; tensor var_7349_cast_fp16 = mul(x = var_7143_cast_fp16, y = var_1194_cast_fp16)[name = string("op_7349_cast_fp16")]; tensor var_7350_cast_fp16 = mul(x = nk_flat_35_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_7350_cast_fp16")]; tensor key_cache_73_cast_fp16 = add(x = var_7349_cast_fp16, y = var_7350_cast_fp16)[name = string("key_cache_73_cast_fp16")]; tensor var_7356_cast_fp16 = mul(x = var_7163_cast_fp16, y = var_1194_cast_fp16)[name = string("op_7356_cast_fp16")]; tensor var_7357_cast_fp16 = mul(x = nv_flat_35_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_7357_cast_fp16")]; tensor value_cache_73_cast_fp16 = add(x = var_7356_cast_fp16, y = var_7357_cast_fp16)[name = string("value_cache_73_cast_fp16")]; tensor kc_103_axes_0 = const()[name = string("kc_103_axes_0"), val = tensor([2])]; tensor kc_103_cast_fp16 = squeeze(axes = kc_103_axes_0, x = key_cache_73_cast_fp16)[name = string("kc_103_cast_fp16")]; tensor var_7366 = const()[name = string("op_7366"), val = tensor([1, 8, 128, 256])]; tensor kc_105_cast_fp16 = reshape(shape = var_7366, x = kc_103_cast_fp16)[name = string("kc_105_cast_fp16")]; tensor vc_103_axes_0 = const()[name = string("vc_103_axes_0"), val = tensor([2])]; tensor vc_103_cast_fp16 = squeeze(axes = vc_103_axes_0, x = value_cache_73_cast_fp16)[name = string("vc_103_cast_fp16")]; tensor var_7374 = const()[name = string("op_7374"), val = tensor([1, 8, 128, 256])]; tensor vc_105_cast_fp16 = reshape(shape = var_7374, x = vc_103_cast_fp16)[name = string("vc_105_cast_fp16")]; tensor var_7377_axes_0 = const()[name = string("op_7377_axes_0"), val = tensor([2])]; tensor var_7377_cast_fp16 = expand_dims(axes = var_7377_axes_0, x = kc_105_cast_fp16)[name = string("op_7377_cast_fp16")]; tensor var_7385_reps_0 = const()[name = string("op_7385_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7385_cast_fp16 = tile(reps = var_7385_reps_0, x = var_7377_cast_fp16)[name = string("op_7385_cast_fp16")]; tensor var_7390 = const()[name = string("op_7390"), val = tensor([1, 16, 128, 256])]; tensor kc_107_cast_fp16 = reshape(shape = var_7390, x = var_7385_cast_fp16)[name = string("kc_107_cast_fp16")]; tensor var_7393_axes_0 = const()[name = string("op_7393_axes_0"), val = tensor([2])]; tensor var_7393_cast_fp16 = expand_dims(axes = var_7393_axes_0, x = vc_105_cast_fp16)[name = string("op_7393_cast_fp16")]; tensor var_7401_reps_0 = const()[name = string("op_7401_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7401_cast_fp16 = tile(reps = var_7401_reps_0, x = var_7393_cast_fp16)[name = string("op_7401_cast_fp16")]; tensor var_7406 = const()[name = string("op_7406"), val = tensor([1, 16, 128, 256])]; tensor vc_107_cast_fp16 = reshape(shape = var_7406, x = var_7401_cast_fp16)[name = string("vc_107_cast_fp16")]; bool var_7408_transpose_x_0 = const()[name = string("op_7408_transpose_x_0"), val = bool(false)]; bool var_7408_transpose_y_0 = const()[name = string("op_7408_transpose_y_0"), val = bool(false)]; tensor var_7408_cast_fp16 = matmul(transpose_x = var_7408_transpose_x_0, transpose_y = var_7408_transpose_y_0, x = q_143_cast_fp16, y = kc_107_cast_fp16)[name = string("op_7408_cast_fp16")]; fp16 _inversed_attn_weights_137_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_137_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_137_cast_fp16 = mul(x = var_7408_cast_fp16, y = _inversed_attn_weights_137_y_0_to_fp16)[name = string("_inversed_attn_weights_137_cast_fp16")]; tensor attn_weights_139_cast_fp16 = add(x = _inversed_attn_weights_137_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_139_cast_fp16")]; int32 var_7422 = const()[name = string("op_7422"), val = int32(-1)]; tensor attn_weights_143_cast_fp16 = softmax(axis = var_7422, x = attn_weights_139_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; bool attn_output_69_transpose_x_1 = const()[name = string("attn_output_69_transpose_x_1"), val = bool(false)]; bool attn_output_69_transpose_y_1 = const()[name = string("attn_output_69_transpose_y_1"), val = bool(true)]; tensor attn_output_69_cast_fp16 = matmul(transpose_x = attn_output_69_transpose_x_1, transpose_y = attn_output_69_transpose_y_1, x = attn_weights_143_cast_fp16, y = vc_107_cast_fp16)[name = string("attn_output_69_cast_fp16")]; tensor var_7431_perm_0 = const()[name = string("op_7431_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7435 = const()[name = string("op_7435"), val = tensor([1, 1, -1])]; tensor var_7431_cast_fp16 = transpose(perm = var_7431_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_40")]; tensor input_173_cast_fp16 = reshape(shape = var_7435, x = var_7431_cast_fp16)[name = string("input_173_cast_fp16")]; tensor layers_17_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273854656))))[name = string("layers_17_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_o_proj_weight_to_fp16_palettized, x = input_173_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor var_7441_axes_0 = const()[name = string("op_7441_axes_0"), val = tensor([0])]; tensor var_7441_cast_fp16 = squeeze(axes = var_7441_axes_0, x = linear_122_cast_fp16)[name = string("op_7441_cast_fp16")]; tensor var_7443_axes_0 = const()[name = string("op_7443_axes_0"), val = tensor([0])]; tensor var_7443_cast_fp16 = squeeze(axes = var_7443_axes_0, x = var_7441_cast_fp16)[name = string("op_7443_cast_fp16")]; tensor var_7445_axes_0 = const()[name = string("op_7445_axes_0"), val = tensor([-1])]; tensor var_7445_cast_fp16 = expand_dims(axes = var_7445_axes_0, x = var_7443_cast_fp16)[name = string("op_7445_cast_fp16")]; tensor attn_4d_35_axes_0 = const()[name = string("attn_4d_35_axes_0"), val = tensor([-1])]; tensor attn_4d_35_cast_fp16 = expand_dims(axes = attn_4d_35_axes_0, x = var_7445_cast_fp16)[name = string("attn_4d_35_cast_fp16")]; tensor hidden_69_cast_fp16 = add(x = hidden_67_cast_fp16, y = attn_4d_35_cast_fp16)[name = string("hidden_69_cast_fp16")]; tensor var_7451_axes_0 = const()[name = string("op_7451_axes_0"), val = tensor([-1])]; tensor var_7451_cast_fp16 = squeeze(axes = var_7451_axes_0, x = hidden_69_cast_fp16)[name = string("op_7451_cast_fp16")]; tensor var_7453_axes_0 = const()[name = string("op_7453_axes_0"), val = tensor([-1])]; tensor var_7453_cast_fp16 = squeeze(axes = var_7453_axes_0, x = var_7451_cast_fp16)[name = string("op_7453_cast_fp16")]; tensor hidden_states_427_axes_0 = const()[name = string("hidden_states_427_axes_0"), val = tensor([0])]; tensor hidden_states_427_cast_fp16 = expand_dims(axes = hidden_states_427_axes_0, x = var_7453_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; fp16 var_7459_promoted_to_fp16 = const()[name = string("op_7459_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7465_cast_fp16 = pow(x = hidden_states_427_cast_fp16, y = var_7459_promoted_to_fp16)[name = string("op_7465_cast_fp16")]; tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([-1])]; bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; tensor variance_143_cast_fp16 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = var_7465_cast_fp16)[name = string("variance_143_cast_fp16")]; fp16 var_7468_to_fp16 = const()[name = string("op_7468_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7469_cast_fp16 = add(x = variance_143_cast_fp16, y = var_7468_to_fp16)[name = string("op_7469_cast_fp16")]; fp32 var_7470_epsilon_0 = const()[name = string("op_7470_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7470_cast_fp16 = rsqrt(epsilon = var_7470_epsilon_0, x = var_7469_cast_fp16)[name = string("op_7470_cast_fp16")]; tensor hidden_states_431_cast_fp16 = mul(x = hidden_states_427_cast_fp16, y = var_7470_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273855232)))]; tensor input_175_cast_fp16 = mul(x = const_180_to_fp16, y = hidden_states_431_cast_fp16)[name = string("input_175_cast_fp16")]; tensor layers_17_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273857344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003136))))[name = string("layers_17_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_gate_proj_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_7480_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_7480_cast_fp16")]; tensor layers_17_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280149504))))[name = string("layers_17_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_up_proj_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_179_cast_fp16 = mul(x = var_7480_cast_fp16, y = linear_124_cast_fp16)[name = string("input_179_cast_fp16")]; tensor layers_17_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280150080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283295872))))[name = string("layers_17_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_mlp_down_proj_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor var_7487_axes_0 = const()[name = string("op_7487_axes_0"), val = tensor([0])]; tensor var_7487_cast_fp16 = squeeze(axes = var_7487_axes_0, x = linear_125_cast_fp16)[name = string("op_7487_cast_fp16")]; tensor var_7489_axes_0 = const()[name = string("op_7489_axes_0"), val = tensor([0])]; tensor var_7489_cast_fp16 = squeeze(axes = var_7489_axes_0, x = var_7487_cast_fp16)[name = string("op_7489_cast_fp16")]; tensor var_7491_axes_0 = const()[name = string("op_7491_axes_0"), val = tensor([-1])]; tensor var_7491_cast_fp16 = expand_dims(axes = var_7491_axes_0, x = var_7489_cast_fp16)[name = string("op_7491_cast_fp16")]; tensor mlp_4d_35_axes_0 = const()[name = string("mlp_4d_35_axes_0"), val = tensor([-1])]; tensor mlp_4d_35_cast_fp16 = expand_dims(axes = mlp_4d_35_axes_0, x = var_7491_cast_fp16)[name = string("mlp_4d_35_cast_fp16")]; tensor hidden_71_cast_fp16 = add(x = hidden_69_cast_fp16, y = mlp_4d_35_cast_fp16)[name = string("hidden_71_cast_fp16")]; tensor var_7505_begin_0 = const()[name = string("op_7505_begin_0"), val = tensor([0, 18432, 0, 0])]; tensor var_7505_end_0 = const()[name = string("op_7505_end_0"), val = tensor([1, 19456, 1, 256])]; tensor var_7505_end_mask_0 = const()[name = string("op_7505_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7505_cast_fp16 = slice_by_index(begin = var_7505_begin_0, end = var_7505_end_0, end_mask = var_7505_end_mask_0, x = key_cache)[name = string("op_7505_cast_fp16")]; tensor var_7525_begin_0 = const()[name = string("op_7525_begin_0"), val = tensor([0, 18432, 0, 0])]; tensor var_7525_end_0 = const()[name = string("op_7525_end_0"), val = tensor([1, 19456, 1, 256])]; tensor var_7525_end_mask_0 = const()[name = string("op_7525_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7525_cast_fp16 = slice_by_index(begin = var_7525_begin_0, end = var_7525_end_0, end_mask = var_7525_end_mask_0, x = value_cache)[name = string("op_7525_cast_fp16")]; tensor var_7537_axes_0 = const()[name = string("op_7537_axes_0"), val = tensor([-1])]; tensor var_7537_cast_fp16 = squeeze(axes = var_7537_axes_0, x = hidden_71_cast_fp16)[name = string("op_7537_cast_fp16")]; tensor var_7539_axes_0 = const()[name = string("op_7539_axes_0"), val = tensor([-1])]; tensor var_7539_cast_fp16 = squeeze(axes = var_7539_axes_0, x = var_7537_cast_fp16)[name = string("op_7539_cast_fp16")]; tensor hidden_states_433_axes_0 = const()[name = string("hidden_states_433_axes_0"), val = tensor([0])]; tensor hidden_states_433_cast_fp16 = expand_dims(axes = hidden_states_433_axes_0, x = var_7539_cast_fp16)[name = string("hidden_states_433_cast_fp16")]; fp16 var_7545_promoted_to_fp16 = const()[name = string("op_7545_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7551_cast_fp16 = pow(x = hidden_states_433_cast_fp16, y = var_7545_promoted_to_fp16)[name = string("op_7551_cast_fp16")]; tensor variance_145_axes_0 = const()[name = string("variance_145_axes_0"), val = tensor([-1])]; bool variance_145_keep_dims_0 = const()[name = string("variance_145_keep_dims_0"), val = bool(true)]; tensor variance_145_cast_fp16 = reduce_mean(axes = variance_145_axes_0, keep_dims = variance_145_keep_dims_0, x = var_7551_cast_fp16)[name = string("variance_145_cast_fp16")]; fp16 var_7554_to_fp16 = const()[name = string("op_7554_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7555_cast_fp16 = add(x = variance_145_cast_fp16, y = var_7554_to_fp16)[name = string("op_7555_cast_fp16")]; fp32 var_7556_epsilon_0 = const()[name = string("op_7556_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7556_cast_fp16 = rsqrt(epsilon = var_7556_epsilon_0, x = var_7555_cast_fp16)[name = string("op_7556_cast_fp16")]; tensor hidden_states_437_cast_fp16 = mul(x = hidden_states_433_cast_fp16, y = var_7556_cast_fp16)[name = string("hidden_states_437_cast_fp16")]; tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283296448)))]; tensor input_181_cast_fp16 = mul(x = const_181_to_fp16, y = hidden_states_437_cast_fp16)[name = string("input_181_cast_fp16")]; tensor layers_18_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283298560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285395776))))[name = string("layers_18_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_18_self_attn_q_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor layers_18_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285396352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286444992))))[name = string("layers_18_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_k_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor layers_18_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286445568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494208))))[name = string("layers_18_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_v_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor var_7573 = const()[name = string("op_7573"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_439_cast_fp16 = reshape(shape = var_7573, x = linear_126_cast_fp16)[name = string("hidden_states_439_cast_fp16")]; tensor var_7579 = const()[name = string("op_7579"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_445_cast_fp16 = reshape(shape = var_7579, x = linear_127_cast_fp16)[name = string("hidden_states_445_cast_fp16")]; tensor var_7585 = const()[name = string("op_7585"), val = tensor([1, 1, 8, 128])]; tensor v_111_cast_fp16 = reshape(shape = var_7585, x = linear_128_cast_fp16)[name = string("v_111_cast_fp16")]; fp16 var_7590_promoted_to_fp16 = const()[name = string("op_7590_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7596_cast_fp16 = pow(x = hidden_states_439_cast_fp16, y = var_7590_promoted_to_fp16)[name = string("op_7596_cast_fp16")]; tensor variance_147_axes_0 = const()[name = string("variance_147_axes_0"), val = tensor([-1])]; bool variance_147_keep_dims_0 = const()[name = string("variance_147_keep_dims_0"), val = bool(true)]; tensor variance_147_cast_fp16 = reduce_mean(axes = variance_147_axes_0, keep_dims = variance_147_keep_dims_0, x = var_7596_cast_fp16)[name = string("variance_147_cast_fp16")]; fp16 var_7599_to_fp16 = const()[name = string("op_7599_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7600_cast_fp16 = add(x = variance_147_cast_fp16, y = var_7599_to_fp16)[name = string("op_7600_cast_fp16")]; fp32 var_7601_epsilon_0 = const()[name = string("op_7601_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7601_cast_fp16 = rsqrt(epsilon = var_7601_epsilon_0, x = var_7600_cast_fp16)[name = string("op_7601_cast_fp16")]; tensor hidden_states_443_cast_fp16 = mul(x = hidden_states_439_cast_fp16, y = var_7601_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494784)))]; tensor q_147_cast_fp16 = mul(x = const_182_to_fp16, y = hidden_states_443_cast_fp16)[name = string("q_147_cast_fp16")]; fp16 var_7608_promoted_to_fp16 = const()[name = string("op_7608_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7614_cast_fp16 = pow(x = hidden_states_445_cast_fp16, y = var_7608_promoted_to_fp16)[name = string("op_7614_cast_fp16")]; tensor variance_149_axes_0 = const()[name = string("variance_149_axes_0"), val = tensor([-1])]; bool variance_149_keep_dims_0 = const()[name = string("variance_149_keep_dims_0"), val = bool(true)]; tensor variance_149_cast_fp16 = reduce_mean(axes = variance_149_axes_0, keep_dims = variance_149_keep_dims_0, x = var_7614_cast_fp16)[name = string("variance_149_cast_fp16")]; fp16 var_7617_to_fp16 = const()[name = string("op_7617_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7618_cast_fp16 = add(x = variance_149_cast_fp16, y = var_7617_to_fp16)[name = string("op_7618_cast_fp16")]; fp32 var_7619_epsilon_0 = const()[name = string("op_7619_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7619_cast_fp16 = rsqrt(epsilon = var_7619_epsilon_0, x = var_7618_cast_fp16)[name = string("op_7619_cast_fp16")]; tensor hidden_states_449_cast_fp16 = mul(x = hidden_states_445_cast_fp16, y = var_7619_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495104)))]; tensor k_147_cast_fp16 = mul(x = const_183_to_fp16, y = hidden_states_449_cast_fp16)[name = string("k_147_cast_fp16")]; tensor q_149_perm_0 = const()[name = string("q_149_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_149_perm_0 = const()[name = string("k_149_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_113_perm_0 = const()[name = string("v_113_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_149_cast_fp16 = transpose(perm = q_149_perm_0, x = q_147_cast_fp16)[name = string("transpose_39")]; tensor var_7636_cast_fp16 = mul(x = q_149_cast_fp16, y = cos_3_cast_fp16)[name = string("op_7636_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_149_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_149_cast_fp16)[name = string("x2_73_cast_fp16")]; fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7657_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_7657_cast_fp16")]; int32 var_7659 = const()[name = string("op_7659"), val = int32(-1)]; bool var_7660_interleave_0 = const()[name = string("op_7660_interleave_0"), val = bool(false)]; tensor var_7660_cast_fp16 = concat(axis = var_7659, interleave = var_7660_interleave_0, values = (var_7657_cast_fp16, x1_73_cast_fp16))[name = string("op_7660_cast_fp16")]; tensor var_7661_cast_fp16 = mul(x = var_7660_cast_fp16, y = sin_3_cast_fp16)[name = string("op_7661_cast_fp16")]; tensor q_151_cast_fp16 = add(x = var_7636_cast_fp16, y = var_7661_cast_fp16)[name = string("q_151_cast_fp16")]; tensor k_149_cast_fp16 = transpose(perm = k_149_perm_0, x = k_147_cast_fp16)[name = string("transpose_38")]; tensor var_7664_cast_fp16 = mul(x = k_149_cast_fp16, y = cos_3_cast_fp16)[name = string("op_7664_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_149_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_149_cast_fp16)[name = string("x2_75_cast_fp16")]; fp16 const_189_promoted_to_fp16 = const()[name = string("const_189_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7685_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_189_promoted_to_fp16)[name = string("op_7685_cast_fp16")]; int32 var_7687 = const()[name = string("op_7687"), val = int32(-1)]; bool var_7688_interleave_0 = const()[name = string("op_7688_interleave_0"), val = bool(false)]; tensor var_7688_cast_fp16 = concat(axis = var_7687, interleave = var_7688_interleave_0, values = (var_7685_cast_fp16, x1_75_cast_fp16))[name = string("op_7688_cast_fp16")]; tensor var_7689_cast_fp16 = mul(x = var_7688_cast_fp16, y = sin_3_cast_fp16)[name = string("op_7689_cast_fp16")]; tensor k_151_cast_fp16 = add(x = var_7664_cast_fp16, y = var_7689_cast_fp16)[name = string("k_151_cast_fp16")]; tensor var_7696 = const()[name = string("op_7696"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_37_cast_fp16 = reshape(shape = var_7696, x = k_151_cast_fp16)[name = string("nk_flat_37_cast_fp16")]; tensor var_7702 = const()[name = string("op_7702"), val = tensor([1, 1024, 1, 1])]; tensor v_113_cast_fp16 = transpose(perm = v_113_perm_0, x = v_111_cast_fp16)[name = string("transpose_37")]; tensor nv_flat_37_cast_fp16 = reshape(shape = var_7702, x = v_113_cast_fp16)[name = string("nv_flat_37_cast_fp16")]; tensor var_7711_cast_fp16 = mul(x = var_7505_cast_fp16, y = var_1194_cast_fp16)[name = string("op_7711_cast_fp16")]; tensor var_7712_cast_fp16 = mul(x = nk_flat_37_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_7712_cast_fp16")]; tensor key_cache_77_cast_fp16 = add(x = var_7711_cast_fp16, y = var_7712_cast_fp16)[name = string("key_cache_77_cast_fp16")]; tensor var_7718_cast_fp16 = mul(x = var_7525_cast_fp16, y = var_1194_cast_fp16)[name = string("op_7718_cast_fp16")]; tensor var_7719_cast_fp16 = mul(x = nv_flat_37_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_7719_cast_fp16")]; tensor value_cache_77_cast_fp16 = add(x = var_7718_cast_fp16, y = var_7719_cast_fp16)[name = string("value_cache_77_cast_fp16")]; tensor kc_109_axes_0 = const()[name = string("kc_109_axes_0"), val = tensor([2])]; tensor kc_109_cast_fp16 = squeeze(axes = kc_109_axes_0, x = key_cache_77_cast_fp16)[name = string("kc_109_cast_fp16")]; tensor var_7728 = const()[name = string("op_7728"), val = tensor([1, 8, 128, 256])]; tensor kc_111_cast_fp16 = reshape(shape = var_7728, x = kc_109_cast_fp16)[name = string("kc_111_cast_fp16")]; tensor vc_109_axes_0 = const()[name = string("vc_109_axes_0"), val = tensor([2])]; tensor vc_109_cast_fp16 = squeeze(axes = vc_109_axes_0, x = value_cache_77_cast_fp16)[name = string("vc_109_cast_fp16")]; tensor var_7736 = const()[name = string("op_7736"), val = tensor([1, 8, 128, 256])]; tensor vc_111_cast_fp16 = reshape(shape = var_7736, x = vc_109_cast_fp16)[name = string("vc_111_cast_fp16")]; tensor var_7739_axes_0 = const()[name = string("op_7739_axes_0"), val = tensor([2])]; tensor var_7739_cast_fp16 = expand_dims(axes = var_7739_axes_0, x = kc_111_cast_fp16)[name = string("op_7739_cast_fp16")]; tensor var_7747_reps_0 = const()[name = string("op_7747_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7747_cast_fp16 = tile(reps = var_7747_reps_0, x = var_7739_cast_fp16)[name = string("op_7747_cast_fp16")]; tensor var_7752 = const()[name = string("op_7752"), val = tensor([1, 16, 128, 256])]; tensor kc_113_cast_fp16 = reshape(shape = var_7752, x = var_7747_cast_fp16)[name = string("kc_113_cast_fp16")]; tensor var_7755_axes_0 = const()[name = string("op_7755_axes_0"), val = tensor([2])]; tensor var_7755_cast_fp16 = expand_dims(axes = var_7755_axes_0, x = vc_111_cast_fp16)[name = string("op_7755_cast_fp16")]; tensor var_7763_reps_0 = const()[name = string("op_7763_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7763_cast_fp16 = tile(reps = var_7763_reps_0, x = var_7755_cast_fp16)[name = string("op_7763_cast_fp16")]; tensor var_7768 = const()[name = string("op_7768"), val = tensor([1, 16, 128, 256])]; tensor vc_113_cast_fp16 = reshape(shape = var_7768, x = var_7763_cast_fp16)[name = string("vc_113_cast_fp16")]; bool var_7770_transpose_x_0 = const()[name = string("op_7770_transpose_x_0"), val = bool(false)]; bool var_7770_transpose_y_0 = const()[name = string("op_7770_transpose_y_0"), val = bool(false)]; tensor var_7770_cast_fp16 = matmul(transpose_x = var_7770_transpose_x_0, transpose_y = var_7770_transpose_y_0, x = q_151_cast_fp16, y = kc_113_cast_fp16)[name = string("op_7770_cast_fp16")]; fp16 _inversed_attn_weights_145_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_145_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_145_cast_fp16 = mul(x = var_7770_cast_fp16, y = _inversed_attn_weights_145_y_0_to_fp16)[name = string("_inversed_attn_weights_145_cast_fp16")]; tensor attn_weights_147_cast_fp16 = add(x = _inversed_attn_weights_145_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_147_cast_fp16")]; int32 var_7784 = const()[name = string("op_7784"), val = int32(-1)]; tensor attn_weights_151_cast_fp16 = softmax(axis = var_7784, x = attn_weights_147_cast_fp16)[name = string("attn_weights_151_cast_fp16")]; bool attn_output_73_transpose_x_1 = const()[name = string("attn_output_73_transpose_x_1"), val = bool(false)]; bool attn_output_73_transpose_y_1 = const()[name = string("attn_output_73_transpose_y_1"), val = bool(true)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_1, transpose_y = attn_output_73_transpose_y_1, x = attn_weights_151_cast_fp16, y = vc_113_cast_fp16)[name = string("attn_output_73_cast_fp16")]; tensor var_7793_perm_0 = const()[name = string("op_7793_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7797 = const()[name = string("op_7797"), val = tensor([1, 1, -1])]; tensor var_7793_cast_fp16 = transpose(perm = var_7793_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_36")]; tensor input_183_cast_fp16 = reshape(shape = var_7797, x = var_7793_cast_fp16)[name = string("input_183_cast_fp16")]; tensor layers_18_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289592640))))[name = string("layers_18_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_o_proj_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor var_7803_axes_0 = const()[name = string("op_7803_axes_0"), val = tensor([0])]; tensor var_7803_cast_fp16 = squeeze(axes = var_7803_axes_0, x = linear_129_cast_fp16)[name = string("op_7803_cast_fp16")]; tensor var_7805_axes_0 = const()[name = string("op_7805_axes_0"), val = tensor([0])]; tensor var_7805_cast_fp16 = squeeze(axes = var_7805_axes_0, x = var_7803_cast_fp16)[name = string("op_7805_cast_fp16")]; tensor var_7807_axes_0 = const()[name = string("op_7807_axes_0"), val = tensor([-1])]; tensor var_7807_cast_fp16 = expand_dims(axes = var_7807_axes_0, x = var_7805_cast_fp16)[name = string("op_7807_cast_fp16")]; tensor attn_4d_37_axes_0 = const()[name = string("attn_4d_37_axes_0"), val = tensor([-1])]; tensor attn_4d_37_cast_fp16 = expand_dims(axes = attn_4d_37_axes_0, x = var_7807_cast_fp16)[name = string("attn_4d_37_cast_fp16")]; tensor hidden_73_cast_fp16 = add(x = hidden_71_cast_fp16, y = attn_4d_37_cast_fp16)[name = string("hidden_73_cast_fp16")]; tensor var_7813_axes_0 = const()[name = string("op_7813_axes_0"), val = tensor([-1])]; tensor var_7813_cast_fp16 = squeeze(axes = var_7813_axes_0, x = hidden_73_cast_fp16)[name = string("op_7813_cast_fp16")]; tensor var_7815_axes_0 = const()[name = string("op_7815_axes_0"), val = tensor([-1])]; tensor var_7815_cast_fp16 = squeeze(axes = var_7815_axes_0, x = var_7813_cast_fp16)[name = string("op_7815_cast_fp16")]; tensor hidden_states_451_axes_0 = const()[name = string("hidden_states_451_axes_0"), val = tensor([0])]; tensor hidden_states_451_cast_fp16 = expand_dims(axes = hidden_states_451_axes_0, x = var_7815_cast_fp16)[name = string("hidden_states_451_cast_fp16")]; fp16 var_7821_promoted_to_fp16 = const()[name = string("op_7821_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7827_cast_fp16 = pow(x = hidden_states_451_cast_fp16, y = var_7821_promoted_to_fp16)[name = string("op_7827_cast_fp16")]; tensor variance_151_axes_0 = const()[name = string("variance_151_axes_0"), val = tensor([-1])]; bool variance_151_keep_dims_0 = const()[name = string("variance_151_keep_dims_0"), val = bool(true)]; tensor variance_151_cast_fp16 = reduce_mean(axes = variance_151_axes_0, keep_dims = variance_151_keep_dims_0, x = var_7827_cast_fp16)[name = string("variance_151_cast_fp16")]; fp16 var_7830_to_fp16 = const()[name = string("op_7830_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7831_cast_fp16 = add(x = variance_151_cast_fp16, y = var_7830_to_fp16)[name = string("op_7831_cast_fp16")]; fp32 var_7832_epsilon_0 = const()[name = string("op_7832_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7832_cast_fp16 = rsqrt(epsilon = var_7832_epsilon_0, x = var_7831_cast_fp16)[name = string("op_7832_cast_fp16")]; tensor hidden_states_455_cast_fp16 = mul(x = hidden_states_451_cast_fp16, y = var_7832_cast_fp16)[name = string("hidden_states_455_cast_fp16")]; tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289593216)))]; tensor input_185_cast_fp16 = mul(x = const_190_to_fp16, y = hidden_states_455_cast_fp16)[name = string("input_185_cast_fp16")]; tensor layers_18_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289595328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741120))))[name = string("layers_18_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_gate_proj_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_7842_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_7842_cast_fp16")]; tensor layers_18_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295887488))))[name = string("layers_18_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_up_proj_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_189_cast_fp16 = mul(x = var_7842_cast_fp16, y = linear_131_cast_fp16)[name = string("input_189_cast_fp16")]; tensor layers_18_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299033856))))[name = string("layers_18_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_132_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_mlp_down_proj_weight_to_fp16_palettized, x = input_189_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor var_7849_axes_0 = const()[name = string("op_7849_axes_0"), val = tensor([0])]; tensor var_7849_cast_fp16 = squeeze(axes = var_7849_axes_0, x = linear_132_cast_fp16)[name = string("op_7849_cast_fp16")]; tensor var_7851_axes_0 = const()[name = string("op_7851_axes_0"), val = tensor([0])]; tensor var_7851_cast_fp16 = squeeze(axes = var_7851_axes_0, x = var_7849_cast_fp16)[name = string("op_7851_cast_fp16")]; tensor var_7853_axes_0 = const()[name = string("op_7853_axes_0"), val = tensor([-1])]; tensor var_7853_cast_fp16 = expand_dims(axes = var_7853_axes_0, x = var_7851_cast_fp16)[name = string("op_7853_cast_fp16")]; tensor mlp_4d_37_axes_0 = const()[name = string("mlp_4d_37_axes_0"), val = tensor([-1])]; tensor mlp_4d_37_cast_fp16 = expand_dims(axes = mlp_4d_37_axes_0, x = var_7853_cast_fp16)[name = string("mlp_4d_37_cast_fp16")]; tensor hidden_75_cast_fp16 = add(x = hidden_73_cast_fp16, y = mlp_4d_37_cast_fp16)[name = string("hidden_75_cast_fp16")]; tensor var_7867_begin_0 = const()[name = string("op_7867_begin_0"), val = tensor([0, 19456, 0, 0])]; tensor var_7867_end_0 = const()[name = string("op_7867_end_0"), val = tensor([1, 20480, 1, 256])]; tensor var_7867_end_mask_0 = const()[name = string("op_7867_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7867_cast_fp16 = slice_by_index(begin = var_7867_begin_0, end = var_7867_end_0, end_mask = var_7867_end_mask_0, x = key_cache)[name = string("op_7867_cast_fp16")]; tensor var_7887_begin_0 = const()[name = string("op_7887_begin_0"), val = tensor([0, 19456, 0, 0])]; tensor var_7887_end_0 = const()[name = string("op_7887_end_0"), val = tensor([1, 20480, 1, 256])]; tensor var_7887_end_mask_0 = const()[name = string("op_7887_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7887_cast_fp16 = slice_by_index(begin = var_7887_begin_0, end = var_7887_end_0, end_mask = var_7887_end_mask_0, x = value_cache)[name = string("op_7887_cast_fp16")]; tensor var_7899_axes_0 = const()[name = string("op_7899_axes_0"), val = tensor([-1])]; tensor var_7899_cast_fp16 = squeeze(axes = var_7899_axes_0, x = hidden_75_cast_fp16)[name = string("op_7899_cast_fp16")]; tensor var_7901_axes_0 = const()[name = string("op_7901_axes_0"), val = tensor([-1])]; tensor var_7901_cast_fp16 = squeeze(axes = var_7901_axes_0, x = var_7899_cast_fp16)[name = string("op_7901_cast_fp16")]; tensor hidden_states_457_axes_0 = const()[name = string("hidden_states_457_axes_0"), val = tensor([0])]; tensor hidden_states_457_cast_fp16 = expand_dims(axes = hidden_states_457_axes_0, x = var_7901_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; fp16 var_7907_promoted_to_fp16 = const()[name = string("op_7907_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7913_cast_fp16 = pow(x = hidden_states_457_cast_fp16, y = var_7907_promoted_to_fp16)[name = string("op_7913_cast_fp16")]; tensor variance_153_axes_0 = const()[name = string("variance_153_axes_0"), val = tensor([-1])]; bool variance_153_keep_dims_0 = const()[name = string("variance_153_keep_dims_0"), val = bool(true)]; tensor variance_153_cast_fp16 = reduce_mean(axes = variance_153_axes_0, keep_dims = variance_153_keep_dims_0, x = var_7913_cast_fp16)[name = string("variance_153_cast_fp16")]; fp16 var_7916_to_fp16 = const()[name = string("op_7916_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7917_cast_fp16 = add(x = variance_153_cast_fp16, y = var_7916_to_fp16)[name = string("op_7917_cast_fp16")]; fp32 var_7918_epsilon_0 = const()[name = string("op_7918_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7918_cast_fp16 = rsqrt(epsilon = var_7918_epsilon_0, x = var_7917_cast_fp16)[name = string("op_7918_cast_fp16")]; tensor hidden_states_461_cast_fp16 = mul(x = hidden_states_457_cast_fp16, y = var_7918_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299034432)))]; tensor input_191_cast_fp16 = mul(x = const_191_to_fp16, y = hidden_states_461_cast_fp16)[name = string("input_191_cast_fp16")]; tensor layers_19_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299036544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301133760))))[name = string("layers_19_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_19_self_attn_q_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor layers_19_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301134336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302182976))))[name = string("layers_19_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_k_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor layers_19_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302183552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232192))))[name = string("layers_19_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_v_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor var_7935 = const()[name = string("op_7935"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_463_cast_fp16 = reshape(shape = var_7935, x = linear_133_cast_fp16)[name = string("hidden_states_463_cast_fp16")]; tensor var_7941 = const()[name = string("op_7941"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_469_cast_fp16 = reshape(shape = var_7941, x = linear_134_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; tensor var_7947 = const()[name = string("op_7947"), val = tensor([1, 1, 8, 128])]; tensor v_117_cast_fp16 = reshape(shape = var_7947, x = linear_135_cast_fp16)[name = string("v_117_cast_fp16")]; fp16 var_7952_promoted_to_fp16 = const()[name = string("op_7952_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7958_cast_fp16 = pow(x = hidden_states_463_cast_fp16, y = var_7952_promoted_to_fp16)[name = string("op_7958_cast_fp16")]; tensor variance_155_axes_0 = const()[name = string("variance_155_axes_0"), val = tensor([-1])]; bool variance_155_keep_dims_0 = const()[name = string("variance_155_keep_dims_0"), val = bool(true)]; tensor variance_155_cast_fp16 = reduce_mean(axes = variance_155_axes_0, keep_dims = variance_155_keep_dims_0, x = var_7958_cast_fp16)[name = string("variance_155_cast_fp16")]; fp16 var_7961_to_fp16 = const()[name = string("op_7961_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7962_cast_fp16 = add(x = variance_155_cast_fp16, y = var_7961_to_fp16)[name = string("op_7962_cast_fp16")]; fp32 var_7963_epsilon_0 = const()[name = string("op_7963_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7963_cast_fp16 = rsqrt(epsilon = var_7963_epsilon_0, x = var_7962_cast_fp16)[name = string("op_7963_cast_fp16")]; tensor hidden_states_467_cast_fp16 = mul(x = hidden_states_463_cast_fp16, y = var_7963_cast_fp16)[name = string("hidden_states_467_cast_fp16")]; tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232768)))]; tensor q_155_cast_fp16 = mul(x = const_192_to_fp16, y = hidden_states_467_cast_fp16)[name = string("q_155_cast_fp16")]; fp16 var_7970_promoted_to_fp16 = const()[name = string("op_7970_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7976_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_7970_promoted_to_fp16)[name = string("op_7976_cast_fp16")]; tensor variance_157_axes_0 = const()[name = string("variance_157_axes_0"), val = tensor([-1])]; bool variance_157_keep_dims_0 = const()[name = string("variance_157_keep_dims_0"), val = bool(true)]; tensor variance_157_cast_fp16 = reduce_mean(axes = variance_157_axes_0, keep_dims = variance_157_keep_dims_0, x = var_7976_cast_fp16)[name = string("variance_157_cast_fp16")]; fp16 var_7979_to_fp16 = const()[name = string("op_7979_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7980_cast_fp16 = add(x = variance_157_cast_fp16, y = var_7979_to_fp16)[name = string("op_7980_cast_fp16")]; fp32 var_7981_epsilon_0 = const()[name = string("op_7981_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7981_cast_fp16 = rsqrt(epsilon = var_7981_epsilon_0, x = var_7980_cast_fp16)[name = string("op_7981_cast_fp16")]; tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_7981_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233088)))]; tensor k_155_cast_fp16 = mul(x = const_193_to_fp16, y = hidden_states_473_cast_fp16)[name = string("k_155_cast_fp16")]; tensor q_157_perm_0 = const()[name = string("q_157_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_157_perm_0 = const()[name = string("k_157_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_119_perm_0 = const()[name = string("v_119_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_157_cast_fp16 = transpose(perm = q_157_perm_0, x = q_155_cast_fp16)[name = string("transpose_35")]; tensor var_7998_cast_fp16 = mul(x = q_157_cast_fp16, y = cos_3_cast_fp16)[name = string("op_7998_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_157_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_157_cast_fp16)[name = string("x2_77_cast_fp16")]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8019_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_8019_cast_fp16")]; int32 var_8021 = const()[name = string("op_8021"), val = int32(-1)]; bool var_8022_interleave_0 = const()[name = string("op_8022_interleave_0"), val = bool(false)]; tensor var_8022_cast_fp16 = concat(axis = var_8021, interleave = var_8022_interleave_0, values = (var_8019_cast_fp16, x1_77_cast_fp16))[name = string("op_8022_cast_fp16")]; tensor var_8023_cast_fp16 = mul(x = var_8022_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8023_cast_fp16")]; tensor q_159_cast_fp16 = add(x = var_7998_cast_fp16, y = var_8023_cast_fp16)[name = string("q_159_cast_fp16")]; tensor k_157_cast_fp16 = transpose(perm = k_157_perm_0, x = k_155_cast_fp16)[name = string("transpose_34")]; tensor var_8026_cast_fp16 = mul(x = k_157_cast_fp16, y = cos_3_cast_fp16)[name = string("op_8026_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_157_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_157_cast_fp16)[name = string("x2_79_cast_fp16")]; fp16 const_199_promoted_to_fp16 = const()[name = string("const_199_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8047_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_199_promoted_to_fp16)[name = string("op_8047_cast_fp16")]; int32 var_8049 = const()[name = string("op_8049"), val = int32(-1)]; bool var_8050_interleave_0 = const()[name = string("op_8050_interleave_0"), val = bool(false)]; tensor var_8050_cast_fp16 = concat(axis = var_8049, interleave = var_8050_interleave_0, values = (var_8047_cast_fp16, x1_79_cast_fp16))[name = string("op_8050_cast_fp16")]; tensor var_8051_cast_fp16 = mul(x = var_8050_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8051_cast_fp16")]; tensor k_159_cast_fp16 = add(x = var_8026_cast_fp16, y = var_8051_cast_fp16)[name = string("k_159_cast_fp16")]; tensor var_8058 = const()[name = string("op_8058"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_39_cast_fp16 = reshape(shape = var_8058, x = k_159_cast_fp16)[name = string("nk_flat_39_cast_fp16")]; tensor var_8064 = const()[name = string("op_8064"), val = tensor([1, 1024, 1, 1])]; tensor v_119_cast_fp16 = transpose(perm = v_119_perm_0, x = v_117_cast_fp16)[name = string("transpose_33")]; tensor nv_flat_39_cast_fp16 = reshape(shape = var_8064, x = v_119_cast_fp16)[name = string("nv_flat_39_cast_fp16")]; tensor var_8073_cast_fp16 = mul(x = var_7867_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8073_cast_fp16")]; tensor var_8074_cast_fp16 = mul(x = nk_flat_39_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8074_cast_fp16")]; tensor key_cache_81_cast_fp16 = add(x = var_8073_cast_fp16, y = var_8074_cast_fp16)[name = string("key_cache_81_cast_fp16")]; tensor var_8080_cast_fp16 = mul(x = var_7887_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8080_cast_fp16")]; tensor var_8081_cast_fp16 = mul(x = nv_flat_39_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8081_cast_fp16")]; tensor value_cache_81_cast_fp16 = add(x = var_8080_cast_fp16, y = var_8081_cast_fp16)[name = string("value_cache_81_cast_fp16")]; tensor kc_115_axes_0 = const()[name = string("kc_115_axes_0"), val = tensor([2])]; tensor kc_115_cast_fp16 = squeeze(axes = kc_115_axes_0, x = key_cache_81_cast_fp16)[name = string("kc_115_cast_fp16")]; tensor var_8090 = const()[name = string("op_8090"), val = tensor([1, 8, 128, 256])]; tensor kc_117_cast_fp16 = reshape(shape = var_8090, x = kc_115_cast_fp16)[name = string("kc_117_cast_fp16")]; tensor vc_115_axes_0 = const()[name = string("vc_115_axes_0"), val = tensor([2])]; tensor vc_115_cast_fp16 = squeeze(axes = vc_115_axes_0, x = value_cache_81_cast_fp16)[name = string("vc_115_cast_fp16")]; tensor var_8098 = const()[name = string("op_8098"), val = tensor([1, 8, 128, 256])]; tensor vc_117_cast_fp16 = reshape(shape = var_8098, x = vc_115_cast_fp16)[name = string("vc_117_cast_fp16")]; tensor var_8101_axes_0 = const()[name = string("op_8101_axes_0"), val = tensor([2])]; tensor var_8101_cast_fp16 = expand_dims(axes = var_8101_axes_0, x = kc_117_cast_fp16)[name = string("op_8101_cast_fp16")]; tensor var_8109_reps_0 = const()[name = string("op_8109_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8109_cast_fp16 = tile(reps = var_8109_reps_0, x = var_8101_cast_fp16)[name = string("op_8109_cast_fp16")]; tensor var_8114 = const()[name = string("op_8114"), val = tensor([1, 16, 128, 256])]; tensor kc_119_cast_fp16 = reshape(shape = var_8114, x = var_8109_cast_fp16)[name = string("kc_119_cast_fp16")]; tensor var_8117_axes_0 = const()[name = string("op_8117_axes_0"), val = tensor([2])]; tensor var_8117_cast_fp16 = expand_dims(axes = var_8117_axes_0, x = vc_117_cast_fp16)[name = string("op_8117_cast_fp16")]; tensor var_8125_reps_0 = const()[name = string("op_8125_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8125_cast_fp16 = tile(reps = var_8125_reps_0, x = var_8117_cast_fp16)[name = string("op_8125_cast_fp16")]; tensor var_8130 = const()[name = string("op_8130"), val = tensor([1, 16, 128, 256])]; tensor vc_119_cast_fp16 = reshape(shape = var_8130, x = var_8125_cast_fp16)[name = string("vc_119_cast_fp16")]; bool var_8132_transpose_x_0 = const()[name = string("op_8132_transpose_x_0"), val = bool(false)]; bool var_8132_transpose_y_0 = const()[name = string("op_8132_transpose_y_0"), val = bool(false)]; tensor var_8132_cast_fp16 = matmul(transpose_x = var_8132_transpose_x_0, transpose_y = var_8132_transpose_y_0, x = q_159_cast_fp16, y = kc_119_cast_fp16)[name = string("op_8132_cast_fp16")]; fp16 _inversed_attn_weights_153_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_153_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_153_cast_fp16 = mul(x = var_8132_cast_fp16, y = _inversed_attn_weights_153_y_0_to_fp16)[name = string("_inversed_attn_weights_153_cast_fp16")]; tensor attn_weights_155_cast_fp16 = add(x = _inversed_attn_weights_153_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_155_cast_fp16")]; int32 var_8146 = const()[name = string("op_8146"), val = int32(-1)]; tensor attn_weights_159_cast_fp16 = softmax(axis = var_8146, x = attn_weights_155_cast_fp16)[name = string("attn_weights_159_cast_fp16")]; bool attn_output_77_transpose_x_1 = const()[name = string("attn_output_77_transpose_x_1"), val = bool(false)]; bool attn_output_77_transpose_y_1 = const()[name = string("attn_output_77_transpose_y_1"), val = bool(true)]; tensor attn_output_77_cast_fp16 = matmul(transpose_x = attn_output_77_transpose_x_1, transpose_y = attn_output_77_transpose_y_1, x = attn_weights_159_cast_fp16, y = vc_119_cast_fp16)[name = string("attn_output_77_cast_fp16")]; tensor var_8155_perm_0 = const()[name = string("op_8155_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8159 = const()[name = string("op_8159"), val = tensor([1, 1, -1])]; tensor var_8155_cast_fp16 = transpose(perm = var_8155_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_32")]; tensor input_193_cast_fp16 = reshape(shape = var_8159, x = var_8155_cast_fp16)[name = string("input_193_cast_fp16")]; tensor layers_19_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305330624))))[name = string("layers_19_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_o_proj_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor var_8165_axes_0 = const()[name = string("op_8165_axes_0"), val = tensor([0])]; tensor var_8165_cast_fp16 = squeeze(axes = var_8165_axes_0, x = linear_136_cast_fp16)[name = string("op_8165_cast_fp16")]; tensor var_8167_axes_0 = const()[name = string("op_8167_axes_0"), val = tensor([0])]; tensor var_8167_cast_fp16 = squeeze(axes = var_8167_axes_0, x = var_8165_cast_fp16)[name = string("op_8167_cast_fp16")]; tensor var_8169_axes_0 = const()[name = string("op_8169_axes_0"), val = tensor([-1])]; tensor var_8169_cast_fp16 = expand_dims(axes = var_8169_axes_0, x = var_8167_cast_fp16)[name = string("op_8169_cast_fp16")]; tensor attn_4d_39_axes_0 = const()[name = string("attn_4d_39_axes_0"), val = tensor([-1])]; tensor attn_4d_39_cast_fp16 = expand_dims(axes = attn_4d_39_axes_0, x = var_8169_cast_fp16)[name = string("attn_4d_39_cast_fp16")]; tensor hidden_77_cast_fp16 = add(x = hidden_75_cast_fp16, y = attn_4d_39_cast_fp16)[name = string("hidden_77_cast_fp16")]; tensor var_8175_axes_0 = const()[name = string("op_8175_axes_0"), val = tensor([-1])]; tensor var_8175_cast_fp16 = squeeze(axes = var_8175_axes_0, x = hidden_77_cast_fp16)[name = string("op_8175_cast_fp16")]; tensor var_8177_axes_0 = const()[name = string("op_8177_axes_0"), val = tensor([-1])]; tensor var_8177_cast_fp16 = squeeze(axes = var_8177_axes_0, x = var_8175_cast_fp16)[name = string("op_8177_cast_fp16")]; tensor hidden_states_475_axes_0 = const()[name = string("hidden_states_475_axes_0"), val = tensor([0])]; tensor hidden_states_475_cast_fp16 = expand_dims(axes = hidden_states_475_axes_0, x = var_8177_cast_fp16)[name = string("hidden_states_475_cast_fp16")]; fp16 var_8183_promoted_to_fp16 = const()[name = string("op_8183_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8189_cast_fp16 = pow(x = hidden_states_475_cast_fp16, y = var_8183_promoted_to_fp16)[name = string("op_8189_cast_fp16")]; tensor variance_159_axes_0 = const()[name = string("variance_159_axes_0"), val = tensor([-1])]; bool variance_159_keep_dims_0 = const()[name = string("variance_159_keep_dims_0"), val = bool(true)]; tensor variance_159_cast_fp16 = reduce_mean(axes = variance_159_axes_0, keep_dims = variance_159_keep_dims_0, x = var_8189_cast_fp16)[name = string("variance_159_cast_fp16")]; fp16 var_8192_to_fp16 = const()[name = string("op_8192_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8193_cast_fp16 = add(x = variance_159_cast_fp16, y = var_8192_to_fp16)[name = string("op_8193_cast_fp16")]; fp32 var_8194_epsilon_0 = const()[name = string("op_8194_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8194_cast_fp16 = rsqrt(epsilon = var_8194_epsilon_0, x = var_8193_cast_fp16)[name = string("op_8194_cast_fp16")]; tensor hidden_states_479_cast_fp16 = mul(x = hidden_states_475_cast_fp16, y = var_8194_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305331200)))]; tensor input_195_cast_fp16 = mul(x = const_200_to_fp16, y = hidden_states_479_cast_fp16)[name = string("input_195_cast_fp16")]; tensor layers_19_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479104))))[name = string("layers_19_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_gate_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_8204_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_8204_cast_fp16")]; tensor layers_19_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311625472))))[name = string("layers_19_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_up_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_199_cast_fp16 = mul(x = var_8204_cast_fp16, y = linear_138_cast_fp16)[name = string("input_199_cast_fp16")]; tensor layers_19_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311626048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314771840))))[name = string("layers_19_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_mlp_down_proj_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor var_8211_axes_0 = const()[name = string("op_8211_axes_0"), val = tensor([0])]; tensor var_8211_cast_fp16 = squeeze(axes = var_8211_axes_0, x = linear_139_cast_fp16)[name = string("op_8211_cast_fp16")]; tensor var_8213_axes_0 = const()[name = string("op_8213_axes_0"), val = tensor([0])]; tensor var_8213_cast_fp16 = squeeze(axes = var_8213_axes_0, x = var_8211_cast_fp16)[name = string("op_8213_cast_fp16")]; tensor var_8215_axes_0 = const()[name = string("op_8215_axes_0"), val = tensor([-1])]; tensor var_8215_cast_fp16 = expand_dims(axes = var_8215_axes_0, x = var_8213_cast_fp16)[name = string("op_8215_cast_fp16")]; tensor mlp_4d_39_axes_0 = const()[name = string("mlp_4d_39_axes_0"), val = tensor([-1])]; tensor mlp_4d_39_cast_fp16 = expand_dims(axes = mlp_4d_39_axes_0, x = var_8215_cast_fp16)[name = string("mlp_4d_39_cast_fp16")]; tensor hidden_79_cast_fp16 = add(x = hidden_77_cast_fp16, y = mlp_4d_39_cast_fp16)[name = string("hidden_79_cast_fp16")]; tensor var_8229_begin_0 = const()[name = string("op_8229_begin_0"), val = tensor([0, 20480, 0, 0])]; tensor var_8229_end_0 = const()[name = string("op_8229_end_0"), val = tensor([1, 21504, 1, 256])]; tensor var_8229_end_mask_0 = const()[name = string("op_8229_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8229_cast_fp16 = slice_by_index(begin = var_8229_begin_0, end = var_8229_end_0, end_mask = var_8229_end_mask_0, x = key_cache)[name = string("op_8229_cast_fp16")]; tensor var_8249_begin_0 = const()[name = string("op_8249_begin_0"), val = tensor([0, 20480, 0, 0])]; tensor var_8249_end_0 = const()[name = string("op_8249_end_0"), val = tensor([1, 21504, 1, 256])]; tensor var_8249_end_mask_0 = const()[name = string("op_8249_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8249_cast_fp16 = slice_by_index(begin = var_8249_begin_0, end = var_8249_end_0, end_mask = var_8249_end_mask_0, x = value_cache)[name = string("op_8249_cast_fp16")]; tensor var_8261_axes_0 = const()[name = string("op_8261_axes_0"), val = tensor([-1])]; tensor var_8261_cast_fp16 = squeeze(axes = var_8261_axes_0, x = hidden_79_cast_fp16)[name = string("op_8261_cast_fp16")]; tensor var_8263_axes_0 = const()[name = string("op_8263_axes_0"), val = tensor([-1])]; tensor var_8263_cast_fp16 = squeeze(axes = var_8263_axes_0, x = var_8261_cast_fp16)[name = string("op_8263_cast_fp16")]; tensor hidden_states_481_axes_0 = const()[name = string("hidden_states_481_axes_0"), val = tensor([0])]; tensor hidden_states_481_cast_fp16 = expand_dims(axes = hidden_states_481_axes_0, x = var_8263_cast_fp16)[name = string("hidden_states_481_cast_fp16")]; fp16 var_8269_promoted_to_fp16 = const()[name = string("op_8269_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8275_cast_fp16 = pow(x = hidden_states_481_cast_fp16, y = var_8269_promoted_to_fp16)[name = string("op_8275_cast_fp16")]; tensor variance_161_axes_0 = const()[name = string("variance_161_axes_0"), val = tensor([-1])]; bool variance_161_keep_dims_0 = const()[name = string("variance_161_keep_dims_0"), val = bool(true)]; tensor variance_161_cast_fp16 = reduce_mean(axes = variance_161_axes_0, keep_dims = variance_161_keep_dims_0, x = var_8275_cast_fp16)[name = string("variance_161_cast_fp16")]; fp16 var_8278_to_fp16 = const()[name = string("op_8278_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8279_cast_fp16 = add(x = variance_161_cast_fp16, y = var_8278_to_fp16)[name = string("op_8279_cast_fp16")]; fp32 var_8280_epsilon_0 = const()[name = string("op_8280_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8280_cast_fp16 = rsqrt(epsilon = var_8280_epsilon_0, x = var_8279_cast_fp16)[name = string("op_8280_cast_fp16")]; tensor hidden_states_485_cast_fp16 = mul(x = hidden_states_481_cast_fp16, y = var_8280_cast_fp16)[name = string("hidden_states_485_cast_fp16")]; tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314772416)))]; tensor input_201_cast_fp16 = mul(x = const_201_to_fp16, y = hidden_states_485_cast_fp16)[name = string("input_201_cast_fp16")]; tensor layers_20_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314774528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316871744))))[name = string("layers_20_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_20_self_attn_q_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor layers_20_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316872320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317920960))))[name = string("layers_20_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_k_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor layers_20_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317921536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970176))))[name = string("layers_20_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_v_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_142_cast_fp16")]; tensor var_8297 = const()[name = string("op_8297"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_487_cast_fp16 = reshape(shape = var_8297, x = linear_140_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; tensor var_8303 = const()[name = string("op_8303"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_493_cast_fp16 = reshape(shape = var_8303, x = linear_141_cast_fp16)[name = string("hidden_states_493_cast_fp16")]; tensor var_8309 = const()[name = string("op_8309"), val = tensor([1, 1, 8, 128])]; tensor v_123_cast_fp16 = reshape(shape = var_8309, x = linear_142_cast_fp16)[name = string("v_123_cast_fp16")]; fp16 var_8314_promoted_to_fp16 = const()[name = string("op_8314_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8320_cast_fp16 = pow(x = hidden_states_487_cast_fp16, y = var_8314_promoted_to_fp16)[name = string("op_8320_cast_fp16")]; tensor variance_163_axes_0 = const()[name = string("variance_163_axes_0"), val = tensor([-1])]; bool variance_163_keep_dims_0 = const()[name = string("variance_163_keep_dims_0"), val = bool(true)]; tensor variance_163_cast_fp16 = reduce_mean(axes = variance_163_axes_0, keep_dims = variance_163_keep_dims_0, x = var_8320_cast_fp16)[name = string("variance_163_cast_fp16")]; fp16 var_8323_to_fp16 = const()[name = string("op_8323_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8324_cast_fp16 = add(x = variance_163_cast_fp16, y = var_8323_to_fp16)[name = string("op_8324_cast_fp16")]; fp32 var_8325_epsilon_0 = const()[name = string("op_8325_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8325_cast_fp16 = rsqrt(epsilon = var_8325_epsilon_0, x = var_8324_cast_fp16)[name = string("op_8325_cast_fp16")]; tensor hidden_states_491_cast_fp16 = mul(x = hidden_states_487_cast_fp16, y = var_8325_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970752)))]; tensor q_163_cast_fp16 = mul(x = const_202_to_fp16, y = hidden_states_491_cast_fp16)[name = string("q_163_cast_fp16")]; fp16 var_8332_promoted_to_fp16 = const()[name = string("op_8332_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8338_cast_fp16 = pow(x = hidden_states_493_cast_fp16, y = var_8332_promoted_to_fp16)[name = string("op_8338_cast_fp16")]; tensor variance_165_axes_0 = const()[name = string("variance_165_axes_0"), val = tensor([-1])]; bool variance_165_keep_dims_0 = const()[name = string("variance_165_keep_dims_0"), val = bool(true)]; tensor variance_165_cast_fp16 = reduce_mean(axes = variance_165_axes_0, keep_dims = variance_165_keep_dims_0, x = var_8338_cast_fp16)[name = string("variance_165_cast_fp16")]; fp16 var_8341_to_fp16 = const()[name = string("op_8341_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8342_cast_fp16 = add(x = variance_165_cast_fp16, y = var_8341_to_fp16)[name = string("op_8342_cast_fp16")]; fp32 var_8343_epsilon_0 = const()[name = string("op_8343_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8343_cast_fp16 = rsqrt(epsilon = var_8343_epsilon_0, x = var_8342_cast_fp16)[name = string("op_8343_cast_fp16")]; tensor hidden_states_497_cast_fp16 = mul(x = hidden_states_493_cast_fp16, y = var_8343_cast_fp16)[name = string("hidden_states_497_cast_fp16")]; tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971072)))]; tensor k_163_cast_fp16 = mul(x = const_203_to_fp16, y = hidden_states_497_cast_fp16)[name = string("k_163_cast_fp16")]; tensor q_165_perm_0 = const()[name = string("q_165_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_165_perm_0 = const()[name = string("k_165_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_125_perm_0 = const()[name = string("v_125_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_165_cast_fp16 = transpose(perm = q_165_perm_0, x = q_163_cast_fp16)[name = string("transpose_31")]; tensor var_8360_cast_fp16 = mul(x = q_165_cast_fp16, y = cos_3_cast_fp16)[name = string("op_8360_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_165_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_165_cast_fp16)[name = string("x2_81_cast_fp16")]; fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8381_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_8381_cast_fp16")]; int32 var_8383 = const()[name = string("op_8383"), val = int32(-1)]; bool var_8384_interleave_0 = const()[name = string("op_8384_interleave_0"), val = bool(false)]; tensor var_8384_cast_fp16 = concat(axis = var_8383, interleave = var_8384_interleave_0, values = (var_8381_cast_fp16, x1_81_cast_fp16))[name = string("op_8384_cast_fp16")]; tensor var_8385_cast_fp16 = mul(x = var_8384_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8385_cast_fp16")]; tensor q_167_cast_fp16 = add(x = var_8360_cast_fp16, y = var_8385_cast_fp16)[name = string("q_167_cast_fp16")]; tensor k_165_cast_fp16 = transpose(perm = k_165_perm_0, x = k_163_cast_fp16)[name = string("transpose_30")]; tensor var_8388_cast_fp16 = mul(x = k_165_cast_fp16, y = cos_3_cast_fp16)[name = string("op_8388_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_165_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_165_cast_fp16)[name = string("x2_83_cast_fp16")]; fp16 const_209_promoted_to_fp16 = const()[name = string("const_209_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8409_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_209_promoted_to_fp16)[name = string("op_8409_cast_fp16")]; int32 var_8411 = const()[name = string("op_8411"), val = int32(-1)]; bool var_8412_interleave_0 = const()[name = string("op_8412_interleave_0"), val = bool(false)]; tensor var_8412_cast_fp16 = concat(axis = var_8411, interleave = var_8412_interleave_0, values = (var_8409_cast_fp16, x1_83_cast_fp16))[name = string("op_8412_cast_fp16")]; tensor var_8413_cast_fp16 = mul(x = var_8412_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8413_cast_fp16")]; tensor k_167_cast_fp16 = add(x = var_8388_cast_fp16, y = var_8413_cast_fp16)[name = string("k_167_cast_fp16")]; tensor var_8420 = const()[name = string("op_8420"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_41_cast_fp16 = reshape(shape = var_8420, x = k_167_cast_fp16)[name = string("nk_flat_41_cast_fp16")]; tensor var_8426 = const()[name = string("op_8426"), val = tensor([1, 1024, 1, 1])]; tensor v_125_cast_fp16 = transpose(perm = v_125_perm_0, x = v_123_cast_fp16)[name = string("transpose_29")]; tensor nv_flat_41_cast_fp16 = reshape(shape = var_8426, x = v_125_cast_fp16)[name = string("nv_flat_41_cast_fp16")]; tensor var_8435_cast_fp16 = mul(x = var_8229_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8435_cast_fp16")]; tensor var_8436_cast_fp16 = mul(x = nk_flat_41_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8436_cast_fp16")]; tensor key_cache_85_cast_fp16 = add(x = var_8435_cast_fp16, y = var_8436_cast_fp16)[name = string("key_cache_85_cast_fp16")]; tensor var_8442_cast_fp16 = mul(x = var_8249_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8442_cast_fp16")]; tensor var_8443_cast_fp16 = mul(x = nv_flat_41_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8443_cast_fp16")]; tensor value_cache_85_cast_fp16 = add(x = var_8442_cast_fp16, y = var_8443_cast_fp16)[name = string("value_cache_85_cast_fp16")]; tensor kc_121_axes_0 = const()[name = string("kc_121_axes_0"), val = tensor([2])]; tensor kc_121_cast_fp16 = squeeze(axes = kc_121_axes_0, x = key_cache_85_cast_fp16)[name = string("kc_121_cast_fp16")]; tensor var_8452 = const()[name = string("op_8452"), val = tensor([1, 8, 128, 256])]; tensor kc_123_cast_fp16 = reshape(shape = var_8452, x = kc_121_cast_fp16)[name = string("kc_123_cast_fp16")]; tensor vc_121_axes_0 = const()[name = string("vc_121_axes_0"), val = tensor([2])]; tensor vc_121_cast_fp16 = squeeze(axes = vc_121_axes_0, x = value_cache_85_cast_fp16)[name = string("vc_121_cast_fp16")]; tensor var_8460 = const()[name = string("op_8460"), val = tensor([1, 8, 128, 256])]; tensor vc_123_cast_fp16 = reshape(shape = var_8460, x = vc_121_cast_fp16)[name = string("vc_123_cast_fp16")]; tensor var_8463_axes_0 = const()[name = string("op_8463_axes_0"), val = tensor([2])]; tensor var_8463_cast_fp16 = expand_dims(axes = var_8463_axes_0, x = kc_123_cast_fp16)[name = string("op_8463_cast_fp16")]; tensor var_8471_reps_0 = const()[name = string("op_8471_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8471_cast_fp16 = tile(reps = var_8471_reps_0, x = var_8463_cast_fp16)[name = string("op_8471_cast_fp16")]; tensor var_8476 = const()[name = string("op_8476"), val = tensor([1, 16, 128, 256])]; tensor kc_125_cast_fp16 = reshape(shape = var_8476, x = var_8471_cast_fp16)[name = string("kc_125_cast_fp16")]; tensor var_8479_axes_0 = const()[name = string("op_8479_axes_0"), val = tensor([2])]; tensor var_8479_cast_fp16 = expand_dims(axes = var_8479_axes_0, x = vc_123_cast_fp16)[name = string("op_8479_cast_fp16")]; tensor var_8487_reps_0 = const()[name = string("op_8487_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8487_cast_fp16 = tile(reps = var_8487_reps_0, x = var_8479_cast_fp16)[name = string("op_8487_cast_fp16")]; tensor var_8492 = const()[name = string("op_8492"), val = tensor([1, 16, 128, 256])]; tensor vc_125_cast_fp16 = reshape(shape = var_8492, x = var_8487_cast_fp16)[name = string("vc_125_cast_fp16")]; bool var_8494_transpose_x_0 = const()[name = string("op_8494_transpose_x_0"), val = bool(false)]; bool var_8494_transpose_y_0 = const()[name = string("op_8494_transpose_y_0"), val = bool(false)]; tensor var_8494_cast_fp16 = matmul(transpose_x = var_8494_transpose_x_0, transpose_y = var_8494_transpose_y_0, x = q_167_cast_fp16, y = kc_125_cast_fp16)[name = string("op_8494_cast_fp16")]; fp16 _inversed_attn_weights_161_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_161_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_161_cast_fp16 = mul(x = var_8494_cast_fp16, y = _inversed_attn_weights_161_y_0_to_fp16)[name = string("_inversed_attn_weights_161_cast_fp16")]; tensor attn_weights_163_cast_fp16 = add(x = _inversed_attn_weights_161_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_163_cast_fp16")]; int32 var_8508 = const()[name = string("op_8508"), val = int32(-1)]; tensor attn_weights_167_cast_fp16 = softmax(axis = var_8508, x = attn_weights_163_cast_fp16)[name = string("attn_weights_167_cast_fp16")]; bool attn_output_81_transpose_x_1 = const()[name = string("attn_output_81_transpose_x_1"), val = bool(false)]; bool attn_output_81_transpose_y_1 = const()[name = string("attn_output_81_transpose_y_1"), val = bool(true)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_1, transpose_y = attn_output_81_transpose_y_1, x = attn_weights_167_cast_fp16, y = vc_125_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_8517_perm_0 = const()[name = string("op_8517_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8521 = const()[name = string("op_8521"), val = tensor([1, 1, -1])]; tensor var_8517_cast_fp16 = transpose(perm = var_8517_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_28")]; tensor input_203_cast_fp16 = reshape(shape = var_8521, x = var_8517_cast_fp16)[name = string("input_203_cast_fp16")]; tensor layers_20_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321068608))))[name = string("layers_20_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_o_proj_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor var_8527_axes_0 = const()[name = string("op_8527_axes_0"), val = tensor([0])]; tensor var_8527_cast_fp16 = squeeze(axes = var_8527_axes_0, x = linear_143_cast_fp16)[name = string("op_8527_cast_fp16")]; tensor var_8529_axes_0 = const()[name = string("op_8529_axes_0"), val = tensor([0])]; tensor var_8529_cast_fp16 = squeeze(axes = var_8529_axes_0, x = var_8527_cast_fp16)[name = string("op_8529_cast_fp16")]; tensor var_8531_axes_0 = const()[name = string("op_8531_axes_0"), val = tensor([-1])]; tensor var_8531_cast_fp16 = expand_dims(axes = var_8531_axes_0, x = var_8529_cast_fp16)[name = string("op_8531_cast_fp16")]; tensor attn_4d_41_axes_0 = const()[name = string("attn_4d_41_axes_0"), val = tensor([-1])]; tensor attn_4d_41_cast_fp16 = expand_dims(axes = attn_4d_41_axes_0, x = var_8531_cast_fp16)[name = string("attn_4d_41_cast_fp16")]; tensor hidden_81_cast_fp16 = add(x = hidden_79_cast_fp16, y = attn_4d_41_cast_fp16)[name = string("hidden_81_cast_fp16")]; tensor var_8537_axes_0 = const()[name = string("op_8537_axes_0"), val = tensor([-1])]; tensor var_8537_cast_fp16 = squeeze(axes = var_8537_axes_0, x = hidden_81_cast_fp16)[name = string("op_8537_cast_fp16")]; tensor var_8539_axes_0 = const()[name = string("op_8539_axes_0"), val = tensor([-1])]; tensor var_8539_cast_fp16 = squeeze(axes = var_8539_axes_0, x = var_8537_cast_fp16)[name = string("op_8539_cast_fp16")]; tensor hidden_states_499_axes_0 = const()[name = string("hidden_states_499_axes_0"), val = tensor([0])]; tensor hidden_states_499_cast_fp16 = expand_dims(axes = hidden_states_499_axes_0, x = var_8539_cast_fp16)[name = string("hidden_states_499_cast_fp16")]; fp16 var_8545_promoted_to_fp16 = const()[name = string("op_8545_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8551_cast_fp16 = pow(x = hidden_states_499_cast_fp16, y = var_8545_promoted_to_fp16)[name = string("op_8551_cast_fp16")]; tensor variance_167_axes_0 = const()[name = string("variance_167_axes_0"), val = tensor([-1])]; bool variance_167_keep_dims_0 = const()[name = string("variance_167_keep_dims_0"), val = bool(true)]; tensor variance_167_cast_fp16 = reduce_mean(axes = variance_167_axes_0, keep_dims = variance_167_keep_dims_0, x = var_8551_cast_fp16)[name = string("variance_167_cast_fp16")]; fp16 var_8554_to_fp16 = const()[name = string("op_8554_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8555_cast_fp16 = add(x = variance_167_cast_fp16, y = var_8554_to_fp16)[name = string("op_8555_cast_fp16")]; fp32 var_8556_epsilon_0 = const()[name = string("op_8556_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8556_cast_fp16 = rsqrt(epsilon = var_8556_epsilon_0, x = var_8555_cast_fp16)[name = string("op_8556_cast_fp16")]; tensor hidden_states_503_cast_fp16 = mul(x = hidden_states_499_cast_fp16, y = var_8556_cast_fp16)[name = string("hidden_states_503_cast_fp16")]; tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321069184)))]; tensor input_205_cast_fp16 = mul(x = const_210_to_fp16, y = hidden_states_503_cast_fp16)[name = string("input_205_cast_fp16")]; tensor layers_20_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321071296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217088))))[name = string("layers_20_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_gate_proj_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_8566_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_8566_cast_fp16")]; tensor layers_20_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327363456))))[name = string("layers_20_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_up_proj_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_209_cast_fp16 = mul(x = var_8566_cast_fp16, y = linear_145_cast_fp16)[name = string("input_209_cast_fp16")]; tensor layers_20_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327364032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330509824))))[name = string("layers_20_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_146_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_mlp_down_proj_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor var_8573_axes_0 = const()[name = string("op_8573_axes_0"), val = tensor([0])]; tensor var_8573_cast_fp16 = squeeze(axes = var_8573_axes_0, x = linear_146_cast_fp16)[name = string("op_8573_cast_fp16")]; tensor var_8575_axes_0 = const()[name = string("op_8575_axes_0"), val = tensor([0])]; tensor var_8575_cast_fp16 = squeeze(axes = var_8575_axes_0, x = var_8573_cast_fp16)[name = string("op_8575_cast_fp16")]; tensor var_8577_axes_0 = const()[name = string("op_8577_axes_0"), val = tensor([-1])]; tensor var_8577_cast_fp16 = expand_dims(axes = var_8577_axes_0, x = var_8575_cast_fp16)[name = string("op_8577_cast_fp16")]; tensor mlp_4d_41_axes_0 = const()[name = string("mlp_4d_41_axes_0"), val = tensor([-1])]; tensor mlp_4d_41_cast_fp16 = expand_dims(axes = mlp_4d_41_axes_0, x = var_8577_cast_fp16)[name = string("mlp_4d_41_cast_fp16")]; tensor hidden_83_cast_fp16 = add(x = hidden_81_cast_fp16, y = mlp_4d_41_cast_fp16)[name = string("hidden_83_cast_fp16")]; tensor var_8591_begin_0 = const()[name = string("op_8591_begin_0"), val = tensor([0, 21504, 0, 0])]; tensor var_8591_end_0 = const()[name = string("op_8591_end_0"), val = tensor([1, 22528, 1, 256])]; tensor var_8591_end_mask_0 = const()[name = string("op_8591_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8591_cast_fp16 = slice_by_index(begin = var_8591_begin_0, end = var_8591_end_0, end_mask = var_8591_end_mask_0, x = key_cache)[name = string("op_8591_cast_fp16")]; tensor var_8611_begin_0 = const()[name = string("op_8611_begin_0"), val = tensor([0, 21504, 0, 0])]; tensor var_8611_end_0 = const()[name = string("op_8611_end_0"), val = tensor([1, 22528, 1, 256])]; tensor var_8611_end_mask_0 = const()[name = string("op_8611_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8611_cast_fp16 = slice_by_index(begin = var_8611_begin_0, end = var_8611_end_0, end_mask = var_8611_end_mask_0, x = value_cache)[name = string("op_8611_cast_fp16")]; tensor var_8623_axes_0 = const()[name = string("op_8623_axes_0"), val = tensor([-1])]; tensor var_8623_cast_fp16 = squeeze(axes = var_8623_axes_0, x = hidden_83_cast_fp16)[name = string("op_8623_cast_fp16")]; tensor var_8625_axes_0 = const()[name = string("op_8625_axes_0"), val = tensor([-1])]; tensor var_8625_cast_fp16 = squeeze(axes = var_8625_axes_0, x = var_8623_cast_fp16)[name = string("op_8625_cast_fp16")]; tensor hidden_states_505_axes_0 = const()[name = string("hidden_states_505_axes_0"), val = tensor([0])]; tensor hidden_states_505_cast_fp16 = expand_dims(axes = hidden_states_505_axes_0, x = var_8625_cast_fp16)[name = string("hidden_states_505_cast_fp16")]; fp16 var_8631_promoted_to_fp16 = const()[name = string("op_8631_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8637_cast_fp16 = pow(x = hidden_states_505_cast_fp16, y = var_8631_promoted_to_fp16)[name = string("op_8637_cast_fp16")]; tensor variance_169_axes_0 = const()[name = string("variance_169_axes_0"), val = tensor([-1])]; bool variance_169_keep_dims_0 = const()[name = string("variance_169_keep_dims_0"), val = bool(true)]; tensor variance_169_cast_fp16 = reduce_mean(axes = variance_169_axes_0, keep_dims = variance_169_keep_dims_0, x = var_8637_cast_fp16)[name = string("variance_169_cast_fp16")]; fp16 var_8640_to_fp16 = const()[name = string("op_8640_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8641_cast_fp16 = add(x = variance_169_cast_fp16, y = var_8640_to_fp16)[name = string("op_8641_cast_fp16")]; fp32 var_8642_epsilon_0 = const()[name = string("op_8642_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8642_cast_fp16 = rsqrt(epsilon = var_8642_epsilon_0, x = var_8641_cast_fp16)[name = string("op_8642_cast_fp16")]; tensor hidden_states_509_cast_fp16 = mul(x = hidden_states_505_cast_fp16, y = var_8642_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330510400)))]; tensor input_211_cast_fp16 = mul(x = const_211_to_fp16, y = hidden_states_509_cast_fp16)[name = string("input_211_cast_fp16")]; tensor layers_21_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332609728))))[name = string("layers_21_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_21_self_attn_q_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor layers_21_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332610304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333658944))))[name = string("layers_21_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_k_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor layers_21_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333659520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708160))))[name = string("layers_21_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_v_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor var_8659 = const()[name = string("op_8659"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_511_cast_fp16 = reshape(shape = var_8659, x = linear_147_cast_fp16)[name = string("hidden_states_511_cast_fp16")]; tensor var_8665 = const()[name = string("op_8665"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_517_cast_fp16 = reshape(shape = var_8665, x = linear_148_cast_fp16)[name = string("hidden_states_517_cast_fp16")]; tensor var_8671 = const()[name = string("op_8671"), val = tensor([1, 1, 8, 128])]; tensor v_129_cast_fp16 = reshape(shape = var_8671, x = linear_149_cast_fp16)[name = string("v_129_cast_fp16")]; fp16 var_8676_promoted_to_fp16 = const()[name = string("op_8676_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8682_cast_fp16 = pow(x = hidden_states_511_cast_fp16, y = var_8676_promoted_to_fp16)[name = string("op_8682_cast_fp16")]; tensor variance_171_axes_0 = const()[name = string("variance_171_axes_0"), val = tensor([-1])]; bool variance_171_keep_dims_0 = const()[name = string("variance_171_keep_dims_0"), val = bool(true)]; tensor variance_171_cast_fp16 = reduce_mean(axes = variance_171_axes_0, keep_dims = variance_171_keep_dims_0, x = var_8682_cast_fp16)[name = string("variance_171_cast_fp16")]; fp16 var_8685_to_fp16 = const()[name = string("op_8685_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8686_cast_fp16 = add(x = variance_171_cast_fp16, y = var_8685_to_fp16)[name = string("op_8686_cast_fp16")]; fp32 var_8687_epsilon_0 = const()[name = string("op_8687_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8687_cast_fp16 = rsqrt(epsilon = var_8687_epsilon_0, x = var_8686_cast_fp16)[name = string("op_8687_cast_fp16")]; tensor hidden_states_515_cast_fp16 = mul(x = hidden_states_511_cast_fp16, y = var_8687_cast_fp16)[name = string("hidden_states_515_cast_fp16")]; tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708736)))]; tensor q_171_cast_fp16 = mul(x = const_212_to_fp16, y = hidden_states_515_cast_fp16)[name = string("q_171_cast_fp16")]; fp16 var_8694_promoted_to_fp16 = const()[name = string("op_8694_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8700_cast_fp16 = pow(x = hidden_states_517_cast_fp16, y = var_8694_promoted_to_fp16)[name = string("op_8700_cast_fp16")]; tensor variance_173_axes_0 = const()[name = string("variance_173_axes_0"), val = tensor([-1])]; bool variance_173_keep_dims_0 = const()[name = string("variance_173_keep_dims_0"), val = bool(true)]; tensor variance_173_cast_fp16 = reduce_mean(axes = variance_173_axes_0, keep_dims = variance_173_keep_dims_0, x = var_8700_cast_fp16)[name = string("variance_173_cast_fp16")]; fp16 var_8703_to_fp16 = const()[name = string("op_8703_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8704_cast_fp16 = add(x = variance_173_cast_fp16, y = var_8703_to_fp16)[name = string("op_8704_cast_fp16")]; fp32 var_8705_epsilon_0 = const()[name = string("op_8705_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8705_cast_fp16 = rsqrt(epsilon = var_8705_epsilon_0, x = var_8704_cast_fp16)[name = string("op_8705_cast_fp16")]; tensor hidden_states_521_cast_fp16 = mul(x = hidden_states_517_cast_fp16, y = var_8705_cast_fp16)[name = string("hidden_states_521_cast_fp16")]; tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709056)))]; tensor k_171_cast_fp16 = mul(x = const_213_to_fp16, y = hidden_states_521_cast_fp16)[name = string("k_171_cast_fp16")]; tensor q_173_perm_0 = const()[name = string("q_173_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_173_perm_0 = const()[name = string("k_173_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_131_perm_0 = const()[name = string("v_131_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_173_cast_fp16 = transpose(perm = q_173_perm_0, x = q_171_cast_fp16)[name = string("transpose_27")]; tensor var_8722_cast_fp16 = mul(x = q_173_cast_fp16, y = cos_3_cast_fp16)[name = string("op_8722_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_173_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_173_cast_fp16)[name = string("x2_85_cast_fp16")]; fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8743_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_8743_cast_fp16")]; int32 var_8745 = const()[name = string("op_8745"), val = int32(-1)]; bool var_8746_interleave_0 = const()[name = string("op_8746_interleave_0"), val = bool(false)]; tensor var_8746_cast_fp16 = concat(axis = var_8745, interleave = var_8746_interleave_0, values = (var_8743_cast_fp16, x1_85_cast_fp16))[name = string("op_8746_cast_fp16")]; tensor var_8747_cast_fp16 = mul(x = var_8746_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8747_cast_fp16")]; tensor q_175_cast_fp16 = add(x = var_8722_cast_fp16, y = var_8747_cast_fp16)[name = string("q_175_cast_fp16")]; tensor k_173_cast_fp16 = transpose(perm = k_173_perm_0, x = k_171_cast_fp16)[name = string("transpose_26")]; tensor var_8750_cast_fp16 = mul(x = k_173_cast_fp16, y = cos_3_cast_fp16)[name = string("op_8750_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_173_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_173_cast_fp16)[name = string("x2_87_cast_fp16")]; fp16 const_219_promoted_to_fp16 = const()[name = string("const_219_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8771_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_219_promoted_to_fp16)[name = string("op_8771_cast_fp16")]; int32 var_8773 = const()[name = string("op_8773"), val = int32(-1)]; bool var_8774_interleave_0 = const()[name = string("op_8774_interleave_0"), val = bool(false)]; tensor var_8774_cast_fp16 = concat(axis = var_8773, interleave = var_8774_interleave_0, values = (var_8771_cast_fp16, x1_87_cast_fp16))[name = string("op_8774_cast_fp16")]; tensor var_8775_cast_fp16 = mul(x = var_8774_cast_fp16, y = sin_3_cast_fp16)[name = string("op_8775_cast_fp16")]; tensor k_175_cast_fp16 = add(x = var_8750_cast_fp16, y = var_8775_cast_fp16)[name = string("k_175_cast_fp16")]; tensor var_8782 = const()[name = string("op_8782"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_43_cast_fp16 = reshape(shape = var_8782, x = k_175_cast_fp16)[name = string("nk_flat_43_cast_fp16")]; tensor var_8788 = const()[name = string("op_8788"), val = tensor([1, 1024, 1, 1])]; tensor v_131_cast_fp16 = transpose(perm = v_131_perm_0, x = v_129_cast_fp16)[name = string("transpose_25")]; tensor nv_flat_43_cast_fp16 = reshape(shape = var_8788, x = v_131_cast_fp16)[name = string("nv_flat_43_cast_fp16")]; tensor var_8797_cast_fp16 = mul(x = var_8591_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8797_cast_fp16")]; tensor var_8798_cast_fp16 = mul(x = nk_flat_43_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8798_cast_fp16")]; tensor key_cache_89_cast_fp16 = add(x = var_8797_cast_fp16, y = var_8798_cast_fp16)[name = string("key_cache_89_cast_fp16")]; tensor var_8804_cast_fp16 = mul(x = var_8611_cast_fp16, y = var_1194_cast_fp16)[name = string("op_8804_cast_fp16")]; tensor var_8805_cast_fp16 = mul(x = nv_flat_43_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_8805_cast_fp16")]; tensor value_cache_89_cast_fp16 = add(x = var_8804_cast_fp16, y = var_8805_cast_fp16)[name = string("value_cache_89_cast_fp16")]; tensor kc_127_axes_0 = const()[name = string("kc_127_axes_0"), val = tensor([2])]; tensor kc_127_cast_fp16 = squeeze(axes = kc_127_axes_0, x = key_cache_89_cast_fp16)[name = string("kc_127_cast_fp16")]; tensor var_8814 = const()[name = string("op_8814"), val = tensor([1, 8, 128, 256])]; tensor kc_129_cast_fp16 = reshape(shape = var_8814, x = kc_127_cast_fp16)[name = string("kc_129_cast_fp16")]; tensor vc_127_axes_0 = const()[name = string("vc_127_axes_0"), val = tensor([2])]; tensor vc_127_cast_fp16 = squeeze(axes = vc_127_axes_0, x = value_cache_89_cast_fp16)[name = string("vc_127_cast_fp16")]; tensor var_8822 = const()[name = string("op_8822"), val = tensor([1, 8, 128, 256])]; tensor vc_129_cast_fp16 = reshape(shape = var_8822, x = vc_127_cast_fp16)[name = string("vc_129_cast_fp16")]; tensor var_8825_axes_0 = const()[name = string("op_8825_axes_0"), val = tensor([2])]; tensor var_8825_cast_fp16 = expand_dims(axes = var_8825_axes_0, x = kc_129_cast_fp16)[name = string("op_8825_cast_fp16")]; tensor var_8833_reps_0 = const()[name = string("op_8833_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8833_cast_fp16 = tile(reps = var_8833_reps_0, x = var_8825_cast_fp16)[name = string("op_8833_cast_fp16")]; tensor var_8838 = const()[name = string("op_8838"), val = tensor([1, 16, 128, 256])]; tensor kc_131_cast_fp16 = reshape(shape = var_8838, x = var_8833_cast_fp16)[name = string("kc_131_cast_fp16")]; tensor var_8841_axes_0 = const()[name = string("op_8841_axes_0"), val = tensor([2])]; tensor var_8841_cast_fp16 = expand_dims(axes = var_8841_axes_0, x = vc_129_cast_fp16)[name = string("op_8841_cast_fp16")]; tensor var_8849_reps_0 = const()[name = string("op_8849_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8849_cast_fp16 = tile(reps = var_8849_reps_0, x = var_8841_cast_fp16)[name = string("op_8849_cast_fp16")]; tensor var_8854 = const()[name = string("op_8854"), val = tensor([1, 16, 128, 256])]; tensor vc_131_cast_fp16 = reshape(shape = var_8854, x = var_8849_cast_fp16)[name = string("vc_131_cast_fp16")]; bool var_8856_transpose_x_0 = const()[name = string("op_8856_transpose_x_0"), val = bool(false)]; bool var_8856_transpose_y_0 = const()[name = string("op_8856_transpose_y_0"), val = bool(false)]; tensor var_8856_cast_fp16 = matmul(transpose_x = var_8856_transpose_x_0, transpose_y = var_8856_transpose_y_0, x = q_175_cast_fp16, y = kc_131_cast_fp16)[name = string("op_8856_cast_fp16")]; fp16 _inversed_attn_weights_169_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_169_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_169_cast_fp16 = mul(x = var_8856_cast_fp16, y = _inversed_attn_weights_169_y_0_to_fp16)[name = string("_inversed_attn_weights_169_cast_fp16")]; tensor attn_weights_171_cast_fp16 = add(x = _inversed_attn_weights_169_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_171_cast_fp16")]; int32 var_8870 = const()[name = string("op_8870"), val = int32(-1)]; tensor attn_weights_175_cast_fp16 = softmax(axis = var_8870, x = attn_weights_171_cast_fp16)[name = string("attn_weights_175_cast_fp16")]; bool attn_output_85_transpose_x_1 = const()[name = string("attn_output_85_transpose_x_1"), val = bool(false)]; bool attn_output_85_transpose_y_1 = const()[name = string("attn_output_85_transpose_y_1"), val = bool(true)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_1, transpose_y = attn_output_85_transpose_y_1, x = attn_weights_175_cast_fp16, y = vc_131_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_8879_perm_0 = const()[name = string("op_8879_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8883 = const()[name = string("op_8883"), val = tensor([1, 1, -1])]; tensor var_8879_cast_fp16 = transpose(perm = var_8879_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_24")]; tensor input_213_cast_fp16 = reshape(shape = var_8883, x = var_8879_cast_fp16)[name = string("input_213_cast_fp16")]; tensor layers_21_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336806592))))[name = string("layers_21_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_150_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_o_proj_weight_to_fp16_palettized, x = input_213_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor var_8889_axes_0 = const()[name = string("op_8889_axes_0"), val = tensor([0])]; tensor var_8889_cast_fp16 = squeeze(axes = var_8889_axes_0, x = linear_150_cast_fp16)[name = string("op_8889_cast_fp16")]; tensor var_8891_axes_0 = const()[name = string("op_8891_axes_0"), val = tensor([0])]; tensor var_8891_cast_fp16 = squeeze(axes = var_8891_axes_0, x = var_8889_cast_fp16)[name = string("op_8891_cast_fp16")]; tensor var_8893_axes_0 = const()[name = string("op_8893_axes_0"), val = tensor([-1])]; tensor var_8893_cast_fp16 = expand_dims(axes = var_8893_axes_0, x = var_8891_cast_fp16)[name = string("op_8893_cast_fp16")]; tensor attn_4d_43_axes_0 = const()[name = string("attn_4d_43_axes_0"), val = tensor([-1])]; tensor attn_4d_43_cast_fp16 = expand_dims(axes = attn_4d_43_axes_0, x = var_8893_cast_fp16)[name = string("attn_4d_43_cast_fp16")]; tensor hidden_85_cast_fp16 = add(x = hidden_83_cast_fp16, y = attn_4d_43_cast_fp16)[name = string("hidden_85_cast_fp16")]; tensor var_8899_axes_0 = const()[name = string("op_8899_axes_0"), val = tensor([-1])]; tensor var_8899_cast_fp16 = squeeze(axes = var_8899_axes_0, x = hidden_85_cast_fp16)[name = string("op_8899_cast_fp16")]; tensor var_8901_axes_0 = const()[name = string("op_8901_axes_0"), val = tensor([-1])]; tensor var_8901_cast_fp16 = squeeze(axes = var_8901_axes_0, x = var_8899_cast_fp16)[name = string("op_8901_cast_fp16")]; tensor hidden_states_523_axes_0 = const()[name = string("hidden_states_523_axes_0"), val = tensor([0])]; tensor hidden_states_523_cast_fp16 = expand_dims(axes = hidden_states_523_axes_0, x = var_8901_cast_fp16)[name = string("hidden_states_523_cast_fp16")]; fp16 var_8907_promoted_to_fp16 = const()[name = string("op_8907_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8913_cast_fp16 = pow(x = hidden_states_523_cast_fp16, y = var_8907_promoted_to_fp16)[name = string("op_8913_cast_fp16")]; tensor variance_175_axes_0 = const()[name = string("variance_175_axes_0"), val = tensor([-1])]; bool variance_175_keep_dims_0 = const()[name = string("variance_175_keep_dims_0"), val = bool(true)]; tensor variance_175_cast_fp16 = reduce_mean(axes = variance_175_axes_0, keep_dims = variance_175_keep_dims_0, x = var_8913_cast_fp16)[name = string("variance_175_cast_fp16")]; fp16 var_8916_to_fp16 = const()[name = string("op_8916_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8917_cast_fp16 = add(x = variance_175_cast_fp16, y = var_8916_to_fp16)[name = string("op_8917_cast_fp16")]; fp32 var_8918_epsilon_0 = const()[name = string("op_8918_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8918_cast_fp16 = rsqrt(epsilon = var_8918_epsilon_0, x = var_8917_cast_fp16)[name = string("op_8918_cast_fp16")]; tensor hidden_states_527_cast_fp16 = mul(x = hidden_states_523_cast_fp16, y = var_8918_cast_fp16)[name = string("hidden_states_527_cast_fp16")]; tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336807168)))]; tensor input_215_cast_fp16 = mul(x = const_220_to_fp16, y = hidden_states_527_cast_fp16)[name = string("input_215_cast_fp16")]; tensor layers_21_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336809280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955072))))[name = string("layers_21_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_gate_proj_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_8928_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_8928_cast_fp16")]; tensor layers_21_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343101440))))[name = string("layers_21_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_up_proj_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_219_cast_fp16 = mul(x = var_8928_cast_fp16, y = linear_152_cast_fp16)[name = string("input_219_cast_fp16")]; tensor layers_21_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343102016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346247808))))[name = string("layers_21_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_mlp_down_proj_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor var_8935_axes_0 = const()[name = string("op_8935_axes_0"), val = tensor([0])]; tensor var_8935_cast_fp16 = squeeze(axes = var_8935_axes_0, x = linear_153_cast_fp16)[name = string("op_8935_cast_fp16")]; tensor var_8937_axes_0 = const()[name = string("op_8937_axes_0"), val = tensor([0])]; tensor var_8937_cast_fp16 = squeeze(axes = var_8937_axes_0, x = var_8935_cast_fp16)[name = string("op_8937_cast_fp16")]; tensor var_8939_axes_0 = const()[name = string("op_8939_axes_0"), val = tensor([-1])]; tensor var_8939_cast_fp16 = expand_dims(axes = var_8939_axes_0, x = var_8937_cast_fp16)[name = string("op_8939_cast_fp16")]; tensor mlp_4d_43_axes_0 = const()[name = string("mlp_4d_43_axes_0"), val = tensor([-1])]; tensor mlp_4d_43_cast_fp16 = expand_dims(axes = mlp_4d_43_axes_0, x = var_8939_cast_fp16)[name = string("mlp_4d_43_cast_fp16")]; tensor hidden_87_cast_fp16 = add(x = hidden_85_cast_fp16, y = mlp_4d_43_cast_fp16)[name = string("hidden_87_cast_fp16")]; tensor var_8953_begin_0 = const()[name = string("op_8953_begin_0"), val = tensor([0, 22528, 0, 0])]; tensor var_8953_end_0 = const()[name = string("op_8953_end_0"), val = tensor([1, 23552, 1, 256])]; tensor var_8953_end_mask_0 = const()[name = string("op_8953_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8953_cast_fp16 = slice_by_index(begin = var_8953_begin_0, end = var_8953_end_0, end_mask = var_8953_end_mask_0, x = key_cache)[name = string("op_8953_cast_fp16")]; tensor var_8973_begin_0 = const()[name = string("op_8973_begin_0"), val = tensor([0, 22528, 0, 0])]; tensor var_8973_end_0 = const()[name = string("op_8973_end_0"), val = tensor([1, 23552, 1, 256])]; tensor var_8973_end_mask_0 = const()[name = string("op_8973_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8973_cast_fp16 = slice_by_index(begin = var_8973_begin_0, end = var_8973_end_0, end_mask = var_8973_end_mask_0, x = value_cache)[name = string("op_8973_cast_fp16")]; tensor var_8985_axes_0 = const()[name = string("op_8985_axes_0"), val = tensor([-1])]; tensor var_8985_cast_fp16 = squeeze(axes = var_8985_axes_0, x = hidden_87_cast_fp16)[name = string("op_8985_cast_fp16")]; tensor var_8987_axes_0 = const()[name = string("op_8987_axes_0"), val = tensor([-1])]; tensor var_8987_cast_fp16 = squeeze(axes = var_8987_axes_0, x = var_8985_cast_fp16)[name = string("op_8987_cast_fp16")]; tensor hidden_states_529_axes_0 = const()[name = string("hidden_states_529_axes_0"), val = tensor([0])]; tensor hidden_states_529_cast_fp16 = expand_dims(axes = hidden_states_529_axes_0, x = var_8987_cast_fp16)[name = string("hidden_states_529_cast_fp16")]; fp16 var_8993_promoted_to_fp16 = const()[name = string("op_8993_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8999_cast_fp16 = pow(x = hidden_states_529_cast_fp16, y = var_8993_promoted_to_fp16)[name = string("op_8999_cast_fp16")]; tensor variance_177_axes_0 = const()[name = string("variance_177_axes_0"), val = tensor([-1])]; bool variance_177_keep_dims_0 = const()[name = string("variance_177_keep_dims_0"), val = bool(true)]; tensor variance_177_cast_fp16 = reduce_mean(axes = variance_177_axes_0, keep_dims = variance_177_keep_dims_0, x = var_8999_cast_fp16)[name = string("variance_177_cast_fp16")]; fp16 var_9002_to_fp16 = const()[name = string("op_9002_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9003_cast_fp16 = add(x = variance_177_cast_fp16, y = var_9002_to_fp16)[name = string("op_9003_cast_fp16")]; fp32 var_9004_epsilon_0 = const()[name = string("op_9004_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9004_cast_fp16 = rsqrt(epsilon = var_9004_epsilon_0, x = var_9003_cast_fp16)[name = string("op_9004_cast_fp16")]; tensor hidden_states_533_cast_fp16 = mul(x = hidden_states_529_cast_fp16, y = var_9004_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346248384)))]; tensor input_221_cast_fp16 = mul(x = const_221_to_fp16, y = hidden_states_533_cast_fp16)[name = string("input_221_cast_fp16")]; tensor layers_22_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346250496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348347712))))[name = string("layers_22_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_22_self_attn_q_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor layers_22_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348348288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349396928))))[name = string("layers_22_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_k_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor layers_22_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349397504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446144))))[name = string("layers_22_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_v_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor var_9021 = const()[name = string("op_9021"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_535_cast_fp16 = reshape(shape = var_9021, x = linear_154_cast_fp16)[name = string("hidden_states_535_cast_fp16")]; tensor var_9027 = const()[name = string("op_9027"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_541_cast_fp16 = reshape(shape = var_9027, x = linear_155_cast_fp16)[name = string("hidden_states_541_cast_fp16")]; tensor var_9033 = const()[name = string("op_9033"), val = tensor([1, 1, 8, 128])]; tensor v_135_cast_fp16 = reshape(shape = var_9033, x = linear_156_cast_fp16)[name = string("v_135_cast_fp16")]; fp16 var_9038_promoted_to_fp16 = const()[name = string("op_9038_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9044_cast_fp16 = pow(x = hidden_states_535_cast_fp16, y = var_9038_promoted_to_fp16)[name = string("op_9044_cast_fp16")]; tensor variance_179_axes_0 = const()[name = string("variance_179_axes_0"), val = tensor([-1])]; bool variance_179_keep_dims_0 = const()[name = string("variance_179_keep_dims_0"), val = bool(true)]; tensor variance_179_cast_fp16 = reduce_mean(axes = variance_179_axes_0, keep_dims = variance_179_keep_dims_0, x = var_9044_cast_fp16)[name = string("variance_179_cast_fp16")]; fp16 var_9047_to_fp16 = const()[name = string("op_9047_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9048_cast_fp16 = add(x = variance_179_cast_fp16, y = var_9047_to_fp16)[name = string("op_9048_cast_fp16")]; fp32 var_9049_epsilon_0 = const()[name = string("op_9049_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9049_cast_fp16 = rsqrt(epsilon = var_9049_epsilon_0, x = var_9048_cast_fp16)[name = string("op_9049_cast_fp16")]; tensor hidden_states_539_cast_fp16 = mul(x = hidden_states_535_cast_fp16, y = var_9049_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446720)))]; tensor q_179_cast_fp16 = mul(x = const_222_to_fp16, y = hidden_states_539_cast_fp16)[name = string("q_179_cast_fp16")]; fp16 var_9056_promoted_to_fp16 = const()[name = string("op_9056_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9062_cast_fp16 = pow(x = hidden_states_541_cast_fp16, y = var_9056_promoted_to_fp16)[name = string("op_9062_cast_fp16")]; tensor variance_181_axes_0 = const()[name = string("variance_181_axes_0"), val = tensor([-1])]; bool variance_181_keep_dims_0 = const()[name = string("variance_181_keep_dims_0"), val = bool(true)]; tensor variance_181_cast_fp16 = reduce_mean(axes = variance_181_axes_0, keep_dims = variance_181_keep_dims_0, x = var_9062_cast_fp16)[name = string("variance_181_cast_fp16")]; fp16 var_9065_to_fp16 = const()[name = string("op_9065_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9066_cast_fp16 = add(x = variance_181_cast_fp16, y = var_9065_to_fp16)[name = string("op_9066_cast_fp16")]; fp32 var_9067_epsilon_0 = const()[name = string("op_9067_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9067_cast_fp16 = rsqrt(epsilon = var_9067_epsilon_0, x = var_9066_cast_fp16)[name = string("op_9067_cast_fp16")]; tensor hidden_states_545_cast_fp16 = mul(x = hidden_states_541_cast_fp16, y = var_9067_cast_fp16)[name = string("hidden_states_545_cast_fp16")]; tensor const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447040)))]; tensor k_179_cast_fp16 = mul(x = const_223_to_fp16, y = hidden_states_545_cast_fp16)[name = string("k_179_cast_fp16")]; tensor q_181_perm_0 = const()[name = string("q_181_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_181_perm_0 = const()[name = string("k_181_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_137_perm_0 = const()[name = string("v_137_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_181_cast_fp16 = transpose(perm = q_181_perm_0, x = q_179_cast_fp16)[name = string("transpose_23")]; tensor var_9084_cast_fp16 = mul(x = q_181_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9084_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_181_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_181_cast_fp16)[name = string("x2_89_cast_fp16")]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9105_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_226_promoted_to_fp16)[name = string("op_9105_cast_fp16")]; int32 var_9107 = const()[name = string("op_9107"), val = int32(-1)]; bool var_9108_interleave_0 = const()[name = string("op_9108_interleave_0"), val = bool(false)]; tensor var_9108_cast_fp16 = concat(axis = var_9107, interleave = var_9108_interleave_0, values = (var_9105_cast_fp16, x1_89_cast_fp16))[name = string("op_9108_cast_fp16")]; tensor var_9109_cast_fp16 = mul(x = var_9108_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9109_cast_fp16")]; tensor q_183_cast_fp16 = add(x = var_9084_cast_fp16, y = var_9109_cast_fp16)[name = string("q_183_cast_fp16")]; tensor k_181_cast_fp16 = transpose(perm = k_181_perm_0, x = k_179_cast_fp16)[name = string("transpose_22")]; tensor var_9112_cast_fp16 = mul(x = k_181_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9112_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_181_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_181_cast_fp16)[name = string("x2_91_cast_fp16")]; fp16 const_229_promoted_to_fp16 = const()[name = string("const_229_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9133_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_229_promoted_to_fp16)[name = string("op_9133_cast_fp16")]; int32 var_9135 = const()[name = string("op_9135"), val = int32(-1)]; bool var_9136_interleave_0 = const()[name = string("op_9136_interleave_0"), val = bool(false)]; tensor var_9136_cast_fp16 = concat(axis = var_9135, interleave = var_9136_interleave_0, values = (var_9133_cast_fp16, x1_91_cast_fp16))[name = string("op_9136_cast_fp16")]; tensor var_9137_cast_fp16 = mul(x = var_9136_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9137_cast_fp16")]; tensor k_183_cast_fp16 = add(x = var_9112_cast_fp16, y = var_9137_cast_fp16)[name = string("k_183_cast_fp16")]; tensor var_9144 = const()[name = string("op_9144"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_45_cast_fp16 = reshape(shape = var_9144, x = k_183_cast_fp16)[name = string("nk_flat_45_cast_fp16")]; tensor var_9150 = const()[name = string("op_9150"), val = tensor([1, 1024, 1, 1])]; tensor v_137_cast_fp16 = transpose(perm = v_137_perm_0, x = v_135_cast_fp16)[name = string("transpose_21")]; tensor nv_flat_45_cast_fp16 = reshape(shape = var_9150, x = v_137_cast_fp16)[name = string("nv_flat_45_cast_fp16")]; tensor var_9159_cast_fp16 = mul(x = var_8953_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9159_cast_fp16")]; tensor var_9160_cast_fp16 = mul(x = nk_flat_45_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9160_cast_fp16")]; tensor key_cache_93_cast_fp16 = add(x = var_9159_cast_fp16, y = var_9160_cast_fp16)[name = string("key_cache_93_cast_fp16")]; tensor var_9166_cast_fp16 = mul(x = var_8973_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9166_cast_fp16")]; tensor var_9167_cast_fp16 = mul(x = nv_flat_45_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9167_cast_fp16")]; tensor value_cache_93_cast_fp16 = add(x = var_9166_cast_fp16, y = var_9167_cast_fp16)[name = string("value_cache_93_cast_fp16")]; tensor kc_133_axes_0 = const()[name = string("kc_133_axes_0"), val = tensor([2])]; tensor kc_133_cast_fp16 = squeeze(axes = kc_133_axes_0, x = key_cache_93_cast_fp16)[name = string("kc_133_cast_fp16")]; tensor var_9176 = const()[name = string("op_9176"), val = tensor([1, 8, 128, 256])]; tensor kc_135_cast_fp16 = reshape(shape = var_9176, x = kc_133_cast_fp16)[name = string("kc_135_cast_fp16")]; tensor vc_133_axes_0 = const()[name = string("vc_133_axes_0"), val = tensor([2])]; tensor vc_133_cast_fp16 = squeeze(axes = vc_133_axes_0, x = value_cache_93_cast_fp16)[name = string("vc_133_cast_fp16")]; tensor var_9184 = const()[name = string("op_9184"), val = tensor([1, 8, 128, 256])]; tensor vc_135_cast_fp16 = reshape(shape = var_9184, x = vc_133_cast_fp16)[name = string("vc_135_cast_fp16")]; tensor var_9187_axes_0 = const()[name = string("op_9187_axes_0"), val = tensor([2])]; tensor var_9187_cast_fp16 = expand_dims(axes = var_9187_axes_0, x = kc_135_cast_fp16)[name = string("op_9187_cast_fp16")]; tensor var_9195_reps_0 = const()[name = string("op_9195_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9195_cast_fp16 = tile(reps = var_9195_reps_0, x = var_9187_cast_fp16)[name = string("op_9195_cast_fp16")]; tensor var_9200 = const()[name = string("op_9200"), val = tensor([1, 16, 128, 256])]; tensor kc_137_cast_fp16 = reshape(shape = var_9200, x = var_9195_cast_fp16)[name = string("kc_137_cast_fp16")]; tensor var_9203_axes_0 = const()[name = string("op_9203_axes_0"), val = tensor([2])]; tensor var_9203_cast_fp16 = expand_dims(axes = var_9203_axes_0, x = vc_135_cast_fp16)[name = string("op_9203_cast_fp16")]; tensor var_9211_reps_0 = const()[name = string("op_9211_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9211_cast_fp16 = tile(reps = var_9211_reps_0, x = var_9203_cast_fp16)[name = string("op_9211_cast_fp16")]; tensor var_9216 = const()[name = string("op_9216"), val = tensor([1, 16, 128, 256])]; tensor vc_137_cast_fp16 = reshape(shape = var_9216, x = var_9211_cast_fp16)[name = string("vc_137_cast_fp16")]; bool var_9218_transpose_x_0 = const()[name = string("op_9218_transpose_x_0"), val = bool(false)]; bool var_9218_transpose_y_0 = const()[name = string("op_9218_transpose_y_0"), val = bool(false)]; tensor var_9218_cast_fp16 = matmul(transpose_x = var_9218_transpose_x_0, transpose_y = var_9218_transpose_y_0, x = q_183_cast_fp16, y = kc_137_cast_fp16)[name = string("op_9218_cast_fp16")]; fp16 _inversed_attn_weights_177_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_177_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_177_cast_fp16 = mul(x = var_9218_cast_fp16, y = _inversed_attn_weights_177_y_0_to_fp16)[name = string("_inversed_attn_weights_177_cast_fp16")]; tensor attn_weights_179_cast_fp16 = add(x = _inversed_attn_weights_177_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_179_cast_fp16")]; int32 var_9232 = const()[name = string("op_9232"), val = int32(-1)]; tensor attn_weights_183_cast_fp16 = softmax(axis = var_9232, x = attn_weights_179_cast_fp16)[name = string("attn_weights_183_cast_fp16")]; bool attn_output_89_transpose_x_1 = const()[name = string("attn_output_89_transpose_x_1"), val = bool(false)]; bool attn_output_89_transpose_y_1 = const()[name = string("attn_output_89_transpose_y_1"), val = bool(true)]; tensor attn_output_89_cast_fp16 = matmul(transpose_x = attn_output_89_transpose_x_1, transpose_y = attn_output_89_transpose_y_1, x = attn_weights_183_cast_fp16, y = vc_137_cast_fp16)[name = string("attn_output_89_cast_fp16")]; tensor var_9241_perm_0 = const()[name = string("op_9241_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9245 = const()[name = string("op_9245"), val = tensor([1, 1, -1])]; tensor var_9241_cast_fp16 = transpose(perm = var_9241_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_20")]; tensor input_223_cast_fp16 = reshape(shape = var_9245, x = var_9241_cast_fp16)[name = string("input_223_cast_fp16")]; tensor layers_22_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352544576))))[name = string("layers_22_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_o_proj_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor var_9251_axes_0 = const()[name = string("op_9251_axes_0"), val = tensor([0])]; tensor var_9251_cast_fp16 = squeeze(axes = var_9251_axes_0, x = linear_157_cast_fp16)[name = string("op_9251_cast_fp16")]; tensor var_9253_axes_0 = const()[name = string("op_9253_axes_0"), val = tensor([0])]; tensor var_9253_cast_fp16 = squeeze(axes = var_9253_axes_0, x = var_9251_cast_fp16)[name = string("op_9253_cast_fp16")]; tensor var_9255_axes_0 = const()[name = string("op_9255_axes_0"), val = tensor([-1])]; tensor var_9255_cast_fp16 = expand_dims(axes = var_9255_axes_0, x = var_9253_cast_fp16)[name = string("op_9255_cast_fp16")]; tensor attn_4d_45_axes_0 = const()[name = string("attn_4d_45_axes_0"), val = tensor([-1])]; tensor attn_4d_45_cast_fp16 = expand_dims(axes = attn_4d_45_axes_0, x = var_9255_cast_fp16)[name = string("attn_4d_45_cast_fp16")]; tensor hidden_89_cast_fp16 = add(x = hidden_87_cast_fp16, y = attn_4d_45_cast_fp16)[name = string("hidden_89_cast_fp16")]; tensor var_9261_axes_0 = const()[name = string("op_9261_axes_0"), val = tensor([-1])]; tensor var_9261_cast_fp16 = squeeze(axes = var_9261_axes_0, x = hidden_89_cast_fp16)[name = string("op_9261_cast_fp16")]; tensor var_9263_axes_0 = const()[name = string("op_9263_axes_0"), val = tensor([-1])]; tensor var_9263_cast_fp16 = squeeze(axes = var_9263_axes_0, x = var_9261_cast_fp16)[name = string("op_9263_cast_fp16")]; tensor hidden_states_547_axes_0 = const()[name = string("hidden_states_547_axes_0"), val = tensor([0])]; tensor hidden_states_547_cast_fp16 = expand_dims(axes = hidden_states_547_axes_0, x = var_9263_cast_fp16)[name = string("hidden_states_547_cast_fp16")]; fp16 var_9269_promoted_to_fp16 = const()[name = string("op_9269_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9275_cast_fp16 = pow(x = hidden_states_547_cast_fp16, y = var_9269_promoted_to_fp16)[name = string("op_9275_cast_fp16")]; tensor variance_183_axes_0 = const()[name = string("variance_183_axes_0"), val = tensor([-1])]; bool variance_183_keep_dims_0 = const()[name = string("variance_183_keep_dims_0"), val = bool(true)]; tensor variance_183_cast_fp16 = reduce_mean(axes = variance_183_axes_0, keep_dims = variance_183_keep_dims_0, x = var_9275_cast_fp16)[name = string("variance_183_cast_fp16")]; fp16 var_9278_to_fp16 = const()[name = string("op_9278_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9279_cast_fp16 = add(x = variance_183_cast_fp16, y = var_9278_to_fp16)[name = string("op_9279_cast_fp16")]; fp32 var_9280_epsilon_0 = const()[name = string("op_9280_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9280_cast_fp16 = rsqrt(epsilon = var_9280_epsilon_0, x = var_9279_cast_fp16)[name = string("op_9280_cast_fp16")]; tensor hidden_states_551_cast_fp16 = mul(x = hidden_states_547_cast_fp16, y = var_9280_cast_fp16)[name = string("hidden_states_551_cast_fp16")]; tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352545152)))]; tensor input_225_cast_fp16 = mul(x = const_230_to_fp16, y = hidden_states_551_cast_fp16)[name = string("input_225_cast_fp16")]; tensor layers_22_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352547264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693056))))[name = string("layers_22_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_gate_proj_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_9290_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_9290_cast_fp16")]; tensor layers_22_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358839424))))[name = string("layers_22_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_up_proj_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_229_cast_fp16 = mul(x = var_9290_cast_fp16, y = linear_159_cast_fp16)[name = string("input_229_cast_fp16")]; tensor layers_22_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358840000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361985792))))[name = string("layers_22_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_160_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_mlp_down_proj_weight_to_fp16_palettized, x = input_229_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor var_9297_axes_0 = const()[name = string("op_9297_axes_0"), val = tensor([0])]; tensor var_9297_cast_fp16 = squeeze(axes = var_9297_axes_0, x = linear_160_cast_fp16)[name = string("op_9297_cast_fp16")]; tensor var_9299_axes_0 = const()[name = string("op_9299_axes_0"), val = tensor([0])]; tensor var_9299_cast_fp16 = squeeze(axes = var_9299_axes_0, x = var_9297_cast_fp16)[name = string("op_9299_cast_fp16")]; tensor var_9301_axes_0 = const()[name = string("op_9301_axes_0"), val = tensor([-1])]; tensor var_9301_cast_fp16 = expand_dims(axes = var_9301_axes_0, x = var_9299_cast_fp16)[name = string("op_9301_cast_fp16")]; tensor mlp_4d_45_axes_0 = const()[name = string("mlp_4d_45_axes_0"), val = tensor([-1])]; tensor mlp_4d_45_cast_fp16 = expand_dims(axes = mlp_4d_45_axes_0, x = var_9301_cast_fp16)[name = string("mlp_4d_45_cast_fp16")]; tensor hidden_91_cast_fp16 = add(x = hidden_89_cast_fp16, y = mlp_4d_45_cast_fp16)[name = string("hidden_91_cast_fp16")]; tensor var_9315_begin_0 = const()[name = string("op_9315_begin_0"), val = tensor([0, 23552, 0, 0])]; tensor var_9315_end_0 = const()[name = string("op_9315_end_0"), val = tensor([1, 24576, 1, 256])]; tensor var_9315_end_mask_0 = const()[name = string("op_9315_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9315_cast_fp16 = slice_by_index(begin = var_9315_begin_0, end = var_9315_end_0, end_mask = var_9315_end_mask_0, x = key_cache)[name = string("op_9315_cast_fp16")]; tensor var_9335_begin_0 = const()[name = string("op_9335_begin_0"), val = tensor([0, 23552, 0, 0])]; tensor var_9335_end_0 = const()[name = string("op_9335_end_0"), val = tensor([1, 24576, 1, 256])]; tensor var_9335_end_mask_0 = const()[name = string("op_9335_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9335_cast_fp16 = slice_by_index(begin = var_9335_begin_0, end = var_9335_end_0, end_mask = var_9335_end_mask_0, x = value_cache)[name = string("op_9335_cast_fp16")]; tensor var_9347_axes_0 = const()[name = string("op_9347_axes_0"), val = tensor([-1])]; tensor var_9347_cast_fp16 = squeeze(axes = var_9347_axes_0, x = hidden_91_cast_fp16)[name = string("op_9347_cast_fp16")]; tensor var_9349_axes_0 = const()[name = string("op_9349_axes_0"), val = tensor([-1])]; tensor var_9349_cast_fp16 = squeeze(axes = var_9349_axes_0, x = var_9347_cast_fp16)[name = string("op_9349_cast_fp16")]; tensor hidden_states_553_axes_0 = const()[name = string("hidden_states_553_axes_0"), val = tensor([0])]; tensor hidden_states_553_cast_fp16 = expand_dims(axes = hidden_states_553_axes_0, x = var_9349_cast_fp16)[name = string("hidden_states_553_cast_fp16")]; fp16 var_9355_promoted_to_fp16 = const()[name = string("op_9355_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9361_cast_fp16 = pow(x = hidden_states_553_cast_fp16, y = var_9355_promoted_to_fp16)[name = string("op_9361_cast_fp16")]; tensor variance_185_axes_0 = const()[name = string("variance_185_axes_0"), val = tensor([-1])]; bool variance_185_keep_dims_0 = const()[name = string("variance_185_keep_dims_0"), val = bool(true)]; tensor variance_185_cast_fp16 = reduce_mean(axes = variance_185_axes_0, keep_dims = variance_185_keep_dims_0, x = var_9361_cast_fp16)[name = string("variance_185_cast_fp16")]; fp16 var_9364_to_fp16 = const()[name = string("op_9364_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9365_cast_fp16 = add(x = variance_185_cast_fp16, y = var_9364_to_fp16)[name = string("op_9365_cast_fp16")]; fp32 var_9366_epsilon_0 = const()[name = string("op_9366_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9366_cast_fp16 = rsqrt(epsilon = var_9366_epsilon_0, x = var_9365_cast_fp16)[name = string("op_9366_cast_fp16")]; tensor hidden_states_557_cast_fp16 = mul(x = hidden_states_553_cast_fp16, y = var_9366_cast_fp16)[name = string("hidden_states_557_cast_fp16")]; tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361986368)))]; tensor input_231_cast_fp16 = mul(x = const_231_to_fp16, y = hidden_states_557_cast_fp16)[name = string("input_231_cast_fp16")]; tensor layers_23_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361988480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364085696))))[name = string("layers_23_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_23_self_attn_q_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor layers_23_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364086272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365134912))))[name = string("layers_23_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_k_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor layers_23_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365135488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184128))))[name = string("layers_23_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_v_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor var_9383 = const()[name = string("op_9383"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_559_cast_fp16 = reshape(shape = var_9383, x = linear_161_cast_fp16)[name = string("hidden_states_559_cast_fp16")]; tensor var_9389 = const()[name = string("op_9389"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_565_cast_fp16 = reshape(shape = var_9389, x = linear_162_cast_fp16)[name = string("hidden_states_565_cast_fp16")]; tensor var_9395 = const()[name = string("op_9395"), val = tensor([1, 1, 8, 128])]; tensor v_141_cast_fp16 = reshape(shape = var_9395, x = linear_163_cast_fp16)[name = string("v_141_cast_fp16")]; fp16 var_9400_promoted_to_fp16 = const()[name = string("op_9400_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9406_cast_fp16 = pow(x = hidden_states_559_cast_fp16, y = var_9400_promoted_to_fp16)[name = string("op_9406_cast_fp16")]; tensor variance_187_axes_0 = const()[name = string("variance_187_axes_0"), val = tensor([-1])]; bool variance_187_keep_dims_0 = const()[name = string("variance_187_keep_dims_0"), val = bool(true)]; tensor variance_187_cast_fp16 = reduce_mean(axes = variance_187_axes_0, keep_dims = variance_187_keep_dims_0, x = var_9406_cast_fp16)[name = string("variance_187_cast_fp16")]; fp16 var_9409_to_fp16 = const()[name = string("op_9409_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9410_cast_fp16 = add(x = variance_187_cast_fp16, y = var_9409_to_fp16)[name = string("op_9410_cast_fp16")]; fp32 var_9411_epsilon_0 = const()[name = string("op_9411_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9411_cast_fp16 = rsqrt(epsilon = var_9411_epsilon_0, x = var_9410_cast_fp16)[name = string("op_9411_cast_fp16")]; tensor hidden_states_563_cast_fp16 = mul(x = hidden_states_559_cast_fp16, y = var_9411_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184704)))]; tensor q_187_cast_fp16 = mul(x = const_232_to_fp16, y = hidden_states_563_cast_fp16)[name = string("q_187_cast_fp16")]; fp16 var_9418_promoted_to_fp16 = const()[name = string("op_9418_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9424_cast_fp16 = pow(x = hidden_states_565_cast_fp16, y = var_9418_promoted_to_fp16)[name = string("op_9424_cast_fp16")]; tensor variance_189_axes_0 = const()[name = string("variance_189_axes_0"), val = tensor([-1])]; bool variance_189_keep_dims_0 = const()[name = string("variance_189_keep_dims_0"), val = bool(true)]; tensor variance_189_cast_fp16 = reduce_mean(axes = variance_189_axes_0, keep_dims = variance_189_keep_dims_0, x = var_9424_cast_fp16)[name = string("variance_189_cast_fp16")]; fp16 var_9427_to_fp16 = const()[name = string("op_9427_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9428_cast_fp16 = add(x = variance_189_cast_fp16, y = var_9427_to_fp16)[name = string("op_9428_cast_fp16")]; fp32 var_9429_epsilon_0 = const()[name = string("op_9429_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9429_cast_fp16 = rsqrt(epsilon = var_9429_epsilon_0, x = var_9428_cast_fp16)[name = string("op_9429_cast_fp16")]; tensor hidden_states_569_cast_fp16 = mul(x = hidden_states_565_cast_fp16, y = var_9429_cast_fp16)[name = string("hidden_states_569_cast_fp16")]; tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185024)))]; tensor k_187_cast_fp16 = mul(x = const_233_to_fp16, y = hidden_states_569_cast_fp16)[name = string("k_187_cast_fp16")]; tensor q_189_perm_0 = const()[name = string("q_189_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_189_perm_0 = const()[name = string("k_189_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_143_perm_0 = const()[name = string("v_143_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_189_cast_fp16 = transpose(perm = q_189_perm_0, x = q_187_cast_fp16)[name = string("transpose_19")]; tensor var_9446_cast_fp16 = mul(x = q_189_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9446_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_189_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_189_cast_fp16)[name = string("x2_93_cast_fp16")]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9467_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_236_promoted_to_fp16)[name = string("op_9467_cast_fp16")]; int32 var_9469 = const()[name = string("op_9469"), val = int32(-1)]; bool var_9470_interleave_0 = const()[name = string("op_9470_interleave_0"), val = bool(false)]; tensor var_9470_cast_fp16 = concat(axis = var_9469, interleave = var_9470_interleave_0, values = (var_9467_cast_fp16, x1_93_cast_fp16))[name = string("op_9470_cast_fp16")]; tensor var_9471_cast_fp16 = mul(x = var_9470_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9471_cast_fp16")]; tensor q_191_cast_fp16 = add(x = var_9446_cast_fp16, y = var_9471_cast_fp16)[name = string("q_191_cast_fp16")]; tensor k_189_cast_fp16 = transpose(perm = k_189_perm_0, x = k_187_cast_fp16)[name = string("transpose_18")]; tensor var_9474_cast_fp16 = mul(x = k_189_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9474_cast_fp16")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_189_cast_fp16)[name = string("x1_95_cast_fp16")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_189_cast_fp16)[name = string("x2_95_cast_fp16")]; fp16 const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9495_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_239_promoted_to_fp16)[name = string("op_9495_cast_fp16")]; int32 var_9497 = const()[name = string("op_9497"), val = int32(-1)]; bool var_9498_interleave_0 = const()[name = string("op_9498_interleave_0"), val = bool(false)]; tensor var_9498_cast_fp16 = concat(axis = var_9497, interleave = var_9498_interleave_0, values = (var_9495_cast_fp16, x1_95_cast_fp16))[name = string("op_9498_cast_fp16")]; tensor var_9499_cast_fp16 = mul(x = var_9498_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9499_cast_fp16")]; tensor k_191_cast_fp16 = add(x = var_9474_cast_fp16, y = var_9499_cast_fp16)[name = string("k_191_cast_fp16")]; tensor var_9506 = const()[name = string("op_9506"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_47_cast_fp16 = reshape(shape = var_9506, x = k_191_cast_fp16)[name = string("nk_flat_47_cast_fp16")]; tensor var_9512 = const()[name = string("op_9512"), val = tensor([1, 1024, 1, 1])]; tensor v_143_cast_fp16 = transpose(perm = v_143_perm_0, x = v_141_cast_fp16)[name = string("transpose_17")]; tensor nv_flat_47_cast_fp16 = reshape(shape = var_9512, x = v_143_cast_fp16)[name = string("nv_flat_47_cast_fp16")]; tensor var_9521_cast_fp16 = mul(x = var_9315_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9521_cast_fp16")]; tensor var_9522_cast_fp16 = mul(x = nk_flat_47_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9522_cast_fp16")]; tensor key_cache_97_cast_fp16 = add(x = var_9521_cast_fp16, y = var_9522_cast_fp16)[name = string("key_cache_97_cast_fp16")]; tensor var_9528_cast_fp16 = mul(x = var_9335_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9528_cast_fp16")]; tensor var_9529_cast_fp16 = mul(x = nv_flat_47_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9529_cast_fp16")]; tensor value_cache_97_cast_fp16 = add(x = var_9528_cast_fp16, y = var_9529_cast_fp16)[name = string("value_cache_97_cast_fp16")]; tensor kc_139_axes_0 = const()[name = string("kc_139_axes_0"), val = tensor([2])]; tensor kc_139_cast_fp16 = squeeze(axes = kc_139_axes_0, x = key_cache_97_cast_fp16)[name = string("kc_139_cast_fp16")]; tensor var_9538 = const()[name = string("op_9538"), val = tensor([1, 8, 128, 256])]; tensor kc_141_cast_fp16 = reshape(shape = var_9538, x = kc_139_cast_fp16)[name = string("kc_141_cast_fp16")]; tensor vc_139_axes_0 = const()[name = string("vc_139_axes_0"), val = tensor([2])]; tensor vc_139_cast_fp16 = squeeze(axes = vc_139_axes_0, x = value_cache_97_cast_fp16)[name = string("vc_139_cast_fp16")]; tensor var_9546 = const()[name = string("op_9546"), val = tensor([1, 8, 128, 256])]; tensor vc_141_cast_fp16 = reshape(shape = var_9546, x = vc_139_cast_fp16)[name = string("vc_141_cast_fp16")]; tensor var_9549_axes_0 = const()[name = string("op_9549_axes_0"), val = tensor([2])]; tensor var_9549_cast_fp16 = expand_dims(axes = var_9549_axes_0, x = kc_141_cast_fp16)[name = string("op_9549_cast_fp16")]; tensor var_9557_reps_0 = const()[name = string("op_9557_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9557_cast_fp16 = tile(reps = var_9557_reps_0, x = var_9549_cast_fp16)[name = string("op_9557_cast_fp16")]; tensor var_9562 = const()[name = string("op_9562"), val = tensor([1, 16, 128, 256])]; tensor kc_143_cast_fp16 = reshape(shape = var_9562, x = var_9557_cast_fp16)[name = string("kc_143_cast_fp16")]; tensor var_9565_axes_0 = const()[name = string("op_9565_axes_0"), val = tensor([2])]; tensor var_9565_cast_fp16 = expand_dims(axes = var_9565_axes_0, x = vc_141_cast_fp16)[name = string("op_9565_cast_fp16")]; tensor var_9573_reps_0 = const()[name = string("op_9573_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9573_cast_fp16 = tile(reps = var_9573_reps_0, x = var_9565_cast_fp16)[name = string("op_9573_cast_fp16")]; tensor var_9578 = const()[name = string("op_9578"), val = tensor([1, 16, 128, 256])]; tensor vc_143_cast_fp16 = reshape(shape = var_9578, x = var_9573_cast_fp16)[name = string("vc_143_cast_fp16")]; bool var_9580_transpose_x_0 = const()[name = string("op_9580_transpose_x_0"), val = bool(false)]; bool var_9580_transpose_y_0 = const()[name = string("op_9580_transpose_y_0"), val = bool(false)]; tensor var_9580_cast_fp16 = matmul(transpose_x = var_9580_transpose_x_0, transpose_y = var_9580_transpose_y_0, x = q_191_cast_fp16, y = kc_143_cast_fp16)[name = string("op_9580_cast_fp16")]; fp16 _inversed_attn_weights_185_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_185_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_185_cast_fp16 = mul(x = var_9580_cast_fp16, y = _inversed_attn_weights_185_y_0_to_fp16)[name = string("_inversed_attn_weights_185_cast_fp16")]; tensor attn_weights_187_cast_fp16 = add(x = _inversed_attn_weights_185_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_187_cast_fp16")]; int32 var_9594 = const()[name = string("op_9594"), val = int32(-1)]; tensor attn_weights_191_cast_fp16 = softmax(axis = var_9594, x = attn_weights_187_cast_fp16)[name = string("attn_weights_191_cast_fp16")]; bool attn_output_93_transpose_x_1 = const()[name = string("attn_output_93_transpose_x_1"), val = bool(false)]; bool attn_output_93_transpose_y_1 = const()[name = string("attn_output_93_transpose_y_1"), val = bool(true)]; tensor attn_output_93_cast_fp16 = matmul(transpose_x = attn_output_93_transpose_x_1, transpose_y = attn_output_93_transpose_y_1, x = attn_weights_191_cast_fp16, y = vc_143_cast_fp16)[name = string("attn_output_93_cast_fp16")]; tensor var_9603_perm_0 = const()[name = string("op_9603_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9607 = const()[name = string("op_9607"), val = tensor([1, 1, -1])]; tensor var_9603_cast_fp16 = transpose(perm = var_9603_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_16")]; tensor input_233_cast_fp16 = reshape(shape = var_9607, x = var_9603_cast_fp16)[name = string("input_233_cast_fp16")]; tensor layers_23_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368282560))))[name = string("layers_23_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_164_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_o_proj_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor var_9613_axes_0 = const()[name = string("op_9613_axes_0"), val = tensor([0])]; tensor var_9613_cast_fp16 = squeeze(axes = var_9613_axes_0, x = linear_164_cast_fp16)[name = string("op_9613_cast_fp16")]; tensor var_9615_axes_0 = const()[name = string("op_9615_axes_0"), val = tensor([0])]; tensor var_9615_cast_fp16 = squeeze(axes = var_9615_axes_0, x = var_9613_cast_fp16)[name = string("op_9615_cast_fp16")]; tensor var_9617_axes_0 = const()[name = string("op_9617_axes_0"), val = tensor([-1])]; tensor var_9617_cast_fp16 = expand_dims(axes = var_9617_axes_0, x = var_9615_cast_fp16)[name = string("op_9617_cast_fp16")]; tensor attn_4d_47_axes_0 = const()[name = string("attn_4d_47_axes_0"), val = tensor([-1])]; tensor attn_4d_47_cast_fp16 = expand_dims(axes = attn_4d_47_axes_0, x = var_9617_cast_fp16)[name = string("attn_4d_47_cast_fp16")]; tensor hidden_93_cast_fp16 = add(x = hidden_91_cast_fp16, y = attn_4d_47_cast_fp16)[name = string("hidden_93_cast_fp16")]; tensor var_9623_axes_0 = const()[name = string("op_9623_axes_0"), val = tensor([-1])]; tensor var_9623_cast_fp16 = squeeze(axes = var_9623_axes_0, x = hidden_93_cast_fp16)[name = string("op_9623_cast_fp16")]; tensor var_9625_axes_0 = const()[name = string("op_9625_axes_0"), val = tensor([-1])]; tensor var_9625_cast_fp16 = squeeze(axes = var_9625_axes_0, x = var_9623_cast_fp16)[name = string("op_9625_cast_fp16")]; tensor hidden_states_571_axes_0 = const()[name = string("hidden_states_571_axes_0"), val = tensor([0])]; tensor hidden_states_571_cast_fp16 = expand_dims(axes = hidden_states_571_axes_0, x = var_9625_cast_fp16)[name = string("hidden_states_571_cast_fp16")]; fp16 var_9631_promoted_to_fp16 = const()[name = string("op_9631_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9637_cast_fp16 = pow(x = hidden_states_571_cast_fp16, y = var_9631_promoted_to_fp16)[name = string("op_9637_cast_fp16")]; tensor variance_191_axes_0 = const()[name = string("variance_191_axes_0"), val = tensor([-1])]; bool variance_191_keep_dims_0 = const()[name = string("variance_191_keep_dims_0"), val = bool(true)]; tensor variance_191_cast_fp16 = reduce_mean(axes = variance_191_axes_0, keep_dims = variance_191_keep_dims_0, x = var_9637_cast_fp16)[name = string("variance_191_cast_fp16")]; fp16 var_9640_to_fp16 = const()[name = string("op_9640_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9641_cast_fp16 = add(x = variance_191_cast_fp16, y = var_9640_to_fp16)[name = string("op_9641_cast_fp16")]; fp32 var_9642_epsilon_0 = const()[name = string("op_9642_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9642_cast_fp16 = rsqrt(epsilon = var_9642_epsilon_0, x = var_9641_cast_fp16)[name = string("op_9642_cast_fp16")]; tensor hidden_states_575_cast_fp16 = mul(x = hidden_states_571_cast_fp16, y = var_9642_cast_fp16)[name = string("hidden_states_575_cast_fp16")]; tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368283136)))]; tensor input_235_cast_fp16 = mul(x = const_240_to_fp16, y = hidden_states_575_cast_fp16)[name = string("input_235_cast_fp16")]; tensor layers_23_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368285248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431040))))[name = string("layers_23_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_gate_proj_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_9652_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_9652_cast_fp16")]; tensor layers_23_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577408))))[name = string("layers_23_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_up_proj_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_239_cast_fp16 = mul(x = var_9652_cast_fp16, y = linear_166_cast_fp16)[name = string("input_239_cast_fp16")]; tensor layers_23_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377723776))))[name = string("layers_23_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_167_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_mlp_down_proj_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor var_9659_axes_0 = const()[name = string("op_9659_axes_0"), val = tensor([0])]; tensor var_9659_cast_fp16 = squeeze(axes = var_9659_axes_0, x = linear_167_cast_fp16)[name = string("op_9659_cast_fp16")]; tensor var_9661_axes_0 = const()[name = string("op_9661_axes_0"), val = tensor([0])]; tensor var_9661_cast_fp16 = squeeze(axes = var_9661_axes_0, x = var_9659_cast_fp16)[name = string("op_9661_cast_fp16")]; tensor var_9663_axes_0 = const()[name = string("op_9663_axes_0"), val = tensor([-1])]; tensor var_9663_cast_fp16 = expand_dims(axes = var_9663_axes_0, x = var_9661_cast_fp16)[name = string("op_9663_cast_fp16")]; tensor mlp_4d_47_axes_0 = const()[name = string("mlp_4d_47_axes_0"), val = tensor([-1])]; tensor mlp_4d_47_cast_fp16 = expand_dims(axes = mlp_4d_47_axes_0, x = var_9663_cast_fp16)[name = string("mlp_4d_47_cast_fp16")]; tensor hidden_95_cast_fp16 = add(x = hidden_93_cast_fp16, y = mlp_4d_47_cast_fp16)[name = string("hidden_95_cast_fp16")]; tensor var_9677_begin_0 = const()[name = string("op_9677_begin_0"), val = tensor([0, 24576, 0, 0])]; tensor var_9677_end_0 = const()[name = string("op_9677_end_0"), val = tensor([1, 25600, 1, 256])]; tensor var_9677_end_mask_0 = const()[name = string("op_9677_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9677_cast_fp16 = slice_by_index(begin = var_9677_begin_0, end = var_9677_end_0, end_mask = var_9677_end_mask_0, x = key_cache)[name = string("op_9677_cast_fp16")]; tensor var_9697_begin_0 = const()[name = string("op_9697_begin_0"), val = tensor([0, 24576, 0, 0])]; tensor var_9697_end_0 = const()[name = string("op_9697_end_0"), val = tensor([1, 25600, 1, 256])]; tensor var_9697_end_mask_0 = const()[name = string("op_9697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9697_cast_fp16 = slice_by_index(begin = var_9697_begin_0, end = var_9697_end_0, end_mask = var_9697_end_mask_0, x = value_cache)[name = string("op_9697_cast_fp16")]; tensor var_9709_axes_0 = const()[name = string("op_9709_axes_0"), val = tensor([-1])]; tensor var_9709_cast_fp16 = squeeze(axes = var_9709_axes_0, x = hidden_95_cast_fp16)[name = string("op_9709_cast_fp16")]; tensor var_9711_axes_0 = const()[name = string("op_9711_axes_0"), val = tensor([-1])]; tensor var_9711_cast_fp16 = squeeze(axes = var_9711_axes_0, x = var_9709_cast_fp16)[name = string("op_9711_cast_fp16")]; tensor hidden_states_577_axes_0 = const()[name = string("hidden_states_577_axes_0"), val = tensor([0])]; tensor hidden_states_577_cast_fp16 = expand_dims(axes = hidden_states_577_axes_0, x = var_9711_cast_fp16)[name = string("hidden_states_577_cast_fp16")]; fp16 var_9717_promoted_to_fp16 = const()[name = string("op_9717_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9723_cast_fp16 = pow(x = hidden_states_577_cast_fp16, y = var_9717_promoted_to_fp16)[name = string("op_9723_cast_fp16")]; tensor variance_193_axes_0 = const()[name = string("variance_193_axes_0"), val = tensor([-1])]; bool variance_193_keep_dims_0 = const()[name = string("variance_193_keep_dims_0"), val = bool(true)]; tensor variance_193_cast_fp16 = reduce_mean(axes = variance_193_axes_0, keep_dims = variance_193_keep_dims_0, x = var_9723_cast_fp16)[name = string("variance_193_cast_fp16")]; fp16 var_9726_to_fp16 = const()[name = string("op_9726_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9727_cast_fp16 = add(x = variance_193_cast_fp16, y = var_9726_to_fp16)[name = string("op_9727_cast_fp16")]; fp32 var_9728_epsilon_0 = const()[name = string("op_9728_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9728_cast_fp16 = rsqrt(epsilon = var_9728_epsilon_0, x = var_9727_cast_fp16)[name = string("op_9728_cast_fp16")]; tensor hidden_states_581_cast_fp16 = mul(x = hidden_states_577_cast_fp16, y = var_9728_cast_fp16)[name = string("hidden_states_581_cast_fp16")]; tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377724352)))]; tensor input_241_cast_fp16 = mul(x = const_241_to_fp16, y = hidden_states_581_cast_fp16)[name = string("input_241_cast_fp16")]; tensor layers_24_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377726464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379823680))))[name = string("layers_24_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_24_self_attn_q_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor layers_24_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379824256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380872896))))[name = string("layers_24_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_k_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor layers_24_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922112))))[name = string("layers_24_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_v_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor var_9745 = const()[name = string("op_9745"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_583_cast_fp16 = reshape(shape = var_9745, x = linear_168_cast_fp16)[name = string("hidden_states_583_cast_fp16")]; tensor var_9751 = const()[name = string("op_9751"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_589_cast_fp16 = reshape(shape = var_9751, x = linear_169_cast_fp16)[name = string("hidden_states_589_cast_fp16")]; tensor var_9757 = const()[name = string("op_9757"), val = tensor([1, 1, 8, 128])]; tensor v_147_cast_fp16 = reshape(shape = var_9757, x = linear_170_cast_fp16)[name = string("v_147_cast_fp16")]; fp16 var_9762_promoted_to_fp16 = const()[name = string("op_9762_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9768_cast_fp16 = pow(x = hidden_states_583_cast_fp16, y = var_9762_promoted_to_fp16)[name = string("op_9768_cast_fp16")]; tensor variance_195_axes_0 = const()[name = string("variance_195_axes_0"), val = tensor([-1])]; bool variance_195_keep_dims_0 = const()[name = string("variance_195_keep_dims_0"), val = bool(true)]; tensor variance_195_cast_fp16 = reduce_mean(axes = variance_195_axes_0, keep_dims = variance_195_keep_dims_0, x = var_9768_cast_fp16)[name = string("variance_195_cast_fp16")]; fp16 var_9771_to_fp16 = const()[name = string("op_9771_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9772_cast_fp16 = add(x = variance_195_cast_fp16, y = var_9771_to_fp16)[name = string("op_9772_cast_fp16")]; fp32 var_9773_epsilon_0 = const()[name = string("op_9773_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9773_cast_fp16 = rsqrt(epsilon = var_9773_epsilon_0, x = var_9772_cast_fp16)[name = string("op_9773_cast_fp16")]; tensor hidden_states_587_cast_fp16 = mul(x = hidden_states_583_cast_fp16, y = var_9773_cast_fp16)[name = string("hidden_states_587_cast_fp16")]; tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922688)))]; tensor q_195_cast_fp16 = mul(x = const_242_to_fp16, y = hidden_states_587_cast_fp16)[name = string("q_195_cast_fp16")]; fp16 var_9780_promoted_to_fp16 = const()[name = string("op_9780_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9786_cast_fp16 = pow(x = hidden_states_589_cast_fp16, y = var_9780_promoted_to_fp16)[name = string("op_9786_cast_fp16")]; tensor variance_197_axes_0 = const()[name = string("variance_197_axes_0"), val = tensor([-1])]; bool variance_197_keep_dims_0 = const()[name = string("variance_197_keep_dims_0"), val = bool(true)]; tensor variance_197_cast_fp16 = reduce_mean(axes = variance_197_axes_0, keep_dims = variance_197_keep_dims_0, x = var_9786_cast_fp16)[name = string("variance_197_cast_fp16")]; fp16 var_9789_to_fp16 = const()[name = string("op_9789_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9790_cast_fp16 = add(x = variance_197_cast_fp16, y = var_9789_to_fp16)[name = string("op_9790_cast_fp16")]; fp32 var_9791_epsilon_0 = const()[name = string("op_9791_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9791_cast_fp16 = rsqrt(epsilon = var_9791_epsilon_0, x = var_9790_cast_fp16)[name = string("op_9791_cast_fp16")]; tensor hidden_states_593_cast_fp16 = mul(x = hidden_states_589_cast_fp16, y = var_9791_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923008)))]; tensor k_195_cast_fp16 = mul(x = const_243_to_fp16, y = hidden_states_593_cast_fp16)[name = string("k_195_cast_fp16")]; tensor q_197_perm_0 = const()[name = string("q_197_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_197_perm_0 = const()[name = string("k_197_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_149_perm_0 = const()[name = string("v_149_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_197_cast_fp16 = transpose(perm = q_197_perm_0, x = q_195_cast_fp16)[name = string("transpose_15")]; tensor var_9808_cast_fp16 = mul(x = q_197_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9808_cast_fp16")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_197_cast_fp16)[name = string("x1_97_cast_fp16")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_197_cast_fp16)[name = string("x2_97_cast_fp16")]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9829_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_246_promoted_to_fp16)[name = string("op_9829_cast_fp16")]; int32 var_9831 = const()[name = string("op_9831"), val = int32(-1)]; bool var_9832_interleave_0 = const()[name = string("op_9832_interleave_0"), val = bool(false)]; tensor var_9832_cast_fp16 = concat(axis = var_9831, interleave = var_9832_interleave_0, values = (var_9829_cast_fp16, x1_97_cast_fp16))[name = string("op_9832_cast_fp16")]; tensor var_9833_cast_fp16 = mul(x = var_9832_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9833_cast_fp16")]; tensor q_199_cast_fp16 = add(x = var_9808_cast_fp16, y = var_9833_cast_fp16)[name = string("q_199_cast_fp16")]; tensor k_197_cast_fp16 = transpose(perm = k_197_perm_0, x = k_195_cast_fp16)[name = string("transpose_14")]; tensor var_9836_cast_fp16 = mul(x = k_197_cast_fp16, y = cos_3_cast_fp16)[name = string("op_9836_cast_fp16")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_197_cast_fp16)[name = string("x1_99_cast_fp16")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_197_cast_fp16)[name = string("x2_99_cast_fp16")]; fp16 const_249_promoted_to_fp16 = const()[name = string("const_249_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9857_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_249_promoted_to_fp16)[name = string("op_9857_cast_fp16")]; int32 var_9859 = const()[name = string("op_9859"), val = int32(-1)]; bool var_9860_interleave_0 = const()[name = string("op_9860_interleave_0"), val = bool(false)]; tensor var_9860_cast_fp16 = concat(axis = var_9859, interleave = var_9860_interleave_0, values = (var_9857_cast_fp16, x1_99_cast_fp16))[name = string("op_9860_cast_fp16")]; tensor var_9861_cast_fp16 = mul(x = var_9860_cast_fp16, y = sin_3_cast_fp16)[name = string("op_9861_cast_fp16")]; tensor k_199_cast_fp16 = add(x = var_9836_cast_fp16, y = var_9861_cast_fp16)[name = string("k_199_cast_fp16")]; tensor var_9868 = const()[name = string("op_9868"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_49_cast_fp16 = reshape(shape = var_9868, x = k_199_cast_fp16)[name = string("nk_flat_49_cast_fp16")]; tensor var_9874 = const()[name = string("op_9874"), val = tensor([1, 1024, 1, 1])]; tensor v_149_cast_fp16 = transpose(perm = v_149_perm_0, x = v_147_cast_fp16)[name = string("transpose_13")]; tensor nv_flat_49_cast_fp16 = reshape(shape = var_9874, x = v_149_cast_fp16)[name = string("nv_flat_49_cast_fp16")]; tensor var_9883_cast_fp16 = mul(x = var_9677_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9883_cast_fp16")]; tensor var_9884_cast_fp16 = mul(x = nk_flat_49_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9884_cast_fp16")]; tensor key_cache_101_cast_fp16 = add(x = var_9883_cast_fp16, y = var_9884_cast_fp16)[name = string("key_cache_101_cast_fp16")]; tensor var_9890_cast_fp16 = mul(x = var_9697_cast_fp16, y = var_1194_cast_fp16)[name = string("op_9890_cast_fp16")]; tensor var_9891_cast_fp16 = mul(x = nv_flat_49_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_9891_cast_fp16")]; tensor value_cache_101_cast_fp16 = add(x = var_9890_cast_fp16, y = var_9891_cast_fp16)[name = string("value_cache_101_cast_fp16")]; tensor kc_145_axes_0 = const()[name = string("kc_145_axes_0"), val = tensor([2])]; tensor kc_145_cast_fp16 = squeeze(axes = kc_145_axes_0, x = key_cache_101_cast_fp16)[name = string("kc_145_cast_fp16")]; tensor var_9900 = const()[name = string("op_9900"), val = tensor([1, 8, 128, 256])]; tensor kc_147_cast_fp16 = reshape(shape = var_9900, x = kc_145_cast_fp16)[name = string("kc_147_cast_fp16")]; tensor vc_145_axes_0 = const()[name = string("vc_145_axes_0"), val = tensor([2])]; tensor vc_145_cast_fp16 = squeeze(axes = vc_145_axes_0, x = value_cache_101_cast_fp16)[name = string("vc_145_cast_fp16")]; tensor var_9908 = const()[name = string("op_9908"), val = tensor([1, 8, 128, 256])]; tensor vc_147_cast_fp16 = reshape(shape = var_9908, x = vc_145_cast_fp16)[name = string("vc_147_cast_fp16")]; tensor var_9911_axes_0 = const()[name = string("op_9911_axes_0"), val = tensor([2])]; tensor var_9911_cast_fp16 = expand_dims(axes = var_9911_axes_0, x = kc_147_cast_fp16)[name = string("op_9911_cast_fp16")]; tensor var_9919_reps_0 = const()[name = string("op_9919_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9919_cast_fp16 = tile(reps = var_9919_reps_0, x = var_9911_cast_fp16)[name = string("op_9919_cast_fp16")]; tensor var_9924 = const()[name = string("op_9924"), val = tensor([1, 16, 128, 256])]; tensor kc_149_cast_fp16 = reshape(shape = var_9924, x = var_9919_cast_fp16)[name = string("kc_149_cast_fp16")]; tensor var_9927_axes_0 = const()[name = string("op_9927_axes_0"), val = tensor([2])]; tensor var_9927_cast_fp16 = expand_dims(axes = var_9927_axes_0, x = vc_147_cast_fp16)[name = string("op_9927_cast_fp16")]; tensor var_9935_reps_0 = const()[name = string("op_9935_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9935_cast_fp16 = tile(reps = var_9935_reps_0, x = var_9927_cast_fp16)[name = string("op_9935_cast_fp16")]; tensor var_9940 = const()[name = string("op_9940"), val = tensor([1, 16, 128, 256])]; tensor vc_149_cast_fp16 = reshape(shape = var_9940, x = var_9935_cast_fp16)[name = string("vc_149_cast_fp16")]; bool var_9942_transpose_x_0 = const()[name = string("op_9942_transpose_x_0"), val = bool(false)]; bool var_9942_transpose_y_0 = const()[name = string("op_9942_transpose_y_0"), val = bool(false)]; tensor var_9942_cast_fp16 = matmul(transpose_x = var_9942_transpose_x_0, transpose_y = var_9942_transpose_y_0, x = q_199_cast_fp16, y = kc_149_cast_fp16)[name = string("op_9942_cast_fp16")]; fp16 _inversed_attn_weights_193_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_193_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_193_cast_fp16 = mul(x = var_9942_cast_fp16, y = _inversed_attn_weights_193_y_0_to_fp16)[name = string("_inversed_attn_weights_193_cast_fp16")]; tensor attn_weights_195_cast_fp16 = add(x = _inversed_attn_weights_193_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_195_cast_fp16")]; int32 var_9956 = const()[name = string("op_9956"), val = int32(-1)]; tensor attn_weights_199_cast_fp16 = softmax(axis = var_9956, x = attn_weights_195_cast_fp16)[name = string("attn_weights_199_cast_fp16")]; bool attn_output_97_transpose_x_1 = const()[name = string("attn_output_97_transpose_x_1"), val = bool(false)]; bool attn_output_97_transpose_y_1 = const()[name = string("attn_output_97_transpose_y_1"), val = bool(true)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_1, transpose_y = attn_output_97_transpose_y_1, x = attn_weights_199_cast_fp16, y = vc_149_cast_fp16)[name = string("attn_output_97_cast_fp16")]; tensor var_9965_perm_0 = const()[name = string("op_9965_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9969 = const()[name = string("op_9969"), val = tensor([1, 1, -1])]; tensor var_9965_cast_fp16 = transpose(perm = var_9965_perm_0, x = attn_output_97_cast_fp16)[name = string("transpose_12")]; tensor input_243_cast_fp16 = reshape(shape = var_9969, x = var_9965_cast_fp16)[name = string("input_243_cast_fp16")]; tensor layers_24_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384020544))))[name = string("layers_24_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_171_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_o_proj_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor var_9975_axes_0 = const()[name = string("op_9975_axes_0"), val = tensor([0])]; tensor var_9975_cast_fp16 = squeeze(axes = var_9975_axes_0, x = linear_171_cast_fp16)[name = string("op_9975_cast_fp16")]; tensor var_9977_axes_0 = const()[name = string("op_9977_axes_0"), val = tensor([0])]; tensor var_9977_cast_fp16 = squeeze(axes = var_9977_axes_0, x = var_9975_cast_fp16)[name = string("op_9977_cast_fp16")]; tensor var_9979_axes_0 = const()[name = string("op_9979_axes_0"), val = tensor([-1])]; tensor var_9979_cast_fp16 = expand_dims(axes = var_9979_axes_0, x = var_9977_cast_fp16)[name = string("op_9979_cast_fp16")]; tensor attn_4d_49_axes_0 = const()[name = string("attn_4d_49_axes_0"), val = tensor([-1])]; tensor attn_4d_49_cast_fp16 = expand_dims(axes = attn_4d_49_axes_0, x = var_9979_cast_fp16)[name = string("attn_4d_49_cast_fp16")]; tensor hidden_97_cast_fp16 = add(x = hidden_95_cast_fp16, y = attn_4d_49_cast_fp16)[name = string("hidden_97_cast_fp16")]; tensor var_9985_axes_0 = const()[name = string("op_9985_axes_0"), val = tensor([-1])]; tensor var_9985_cast_fp16 = squeeze(axes = var_9985_axes_0, x = hidden_97_cast_fp16)[name = string("op_9985_cast_fp16")]; tensor var_9987_axes_0 = const()[name = string("op_9987_axes_0"), val = tensor([-1])]; tensor var_9987_cast_fp16 = squeeze(axes = var_9987_axes_0, x = var_9985_cast_fp16)[name = string("op_9987_cast_fp16")]; tensor hidden_states_595_axes_0 = const()[name = string("hidden_states_595_axes_0"), val = tensor([0])]; tensor hidden_states_595_cast_fp16 = expand_dims(axes = hidden_states_595_axes_0, x = var_9987_cast_fp16)[name = string("hidden_states_595_cast_fp16")]; fp16 var_9993_promoted_to_fp16 = const()[name = string("op_9993_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9999_cast_fp16 = pow(x = hidden_states_595_cast_fp16, y = var_9993_promoted_to_fp16)[name = string("op_9999_cast_fp16")]; tensor variance_199_axes_0 = const()[name = string("variance_199_axes_0"), val = tensor([-1])]; bool variance_199_keep_dims_0 = const()[name = string("variance_199_keep_dims_0"), val = bool(true)]; tensor variance_199_cast_fp16 = reduce_mean(axes = variance_199_axes_0, keep_dims = variance_199_keep_dims_0, x = var_9999_cast_fp16)[name = string("variance_199_cast_fp16")]; fp16 var_10002_to_fp16 = const()[name = string("op_10002_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10003_cast_fp16 = add(x = variance_199_cast_fp16, y = var_10002_to_fp16)[name = string("op_10003_cast_fp16")]; fp32 var_10004_epsilon_0 = const()[name = string("op_10004_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10004_cast_fp16 = rsqrt(epsilon = var_10004_epsilon_0, x = var_10003_cast_fp16)[name = string("op_10004_cast_fp16")]; tensor hidden_states_599_cast_fp16 = mul(x = hidden_states_595_cast_fp16, y = var_10004_cast_fp16)[name = string("hidden_states_599_cast_fp16")]; tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384021120)))]; tensor input_245_cast_fp16 = mul(x = const_250_to_fp16, y = hidden_states_599_cast_fp16)[name = string("input_245_cast_fp16")]; tensor layers_24_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384023232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169024))))[name = string("layers_24_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_gate_proj_weight_to_fp16_palettized, x = input_245_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor var_10014_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_10014_cast_fp16")]; tensor layers_24_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315392))))[name = string("layers_24_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_up_proj_weight_to_fp16_palettized, x = input_245_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor input_249_cast_fp16 = mul(x = var_10014_cast_fp16, y = linear_173_cast_fp16)[name = string("input_249_cast_fp16")]; tensor layers_24_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393461760))))[name = string("layers_24_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_174_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_mlp_down_proj_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = string("linear_174_cast_fp16")]; tensor var_10021_axes_0 = const()[name = string("op_10021_axes_0"), val = tensor([0])]; tensor var_10021_cast_fp16 = squeeze(axes = var_10021_axes_0, x = linear_174_cast_fp16)[name = string("op_10021_cast_fp16")]; tensor var_10023_axes_0 = const()[name = string("op_10023_axes_0"), val = tensor([0])]; tensor var_10023_cast_fp16 = squeeze(axes = var_10023_axes_0, x = var_10021_cast_fp16)[name = string("op_10023_cast_fp16")]; tensor var_10025_axes_0 = const()[name = string("op_10025_axes_0"), val = tensor([-1])]; tensor var_10025_cast_fp16 = expand_dims(axes = var_10025_axes_0, x = var_10023_cast_fp16)[name = string("op_10025_cast_fp16")]; tensor mlp_4d_49_axes_0 = const()[name = string("mlp_4d_49_axes_0"), val = tensor([-1])]; tensor mlp_4d_49_cast_fp16 = expand_dims(axes = mlp_4d_49_axes_0, x = var_10025_cast_fp16)[name = string("mlp_4d_49_cast_fp16")]; tensor hidden_99_cast_fp16 = add(x = hidden_97_cast_fp16, y = mlp_4d_49_cast_fp16)[name = string("hidden_99_cast_fp16")]; tensor var_10039_begin_0 = const()[name = string("op_10039_begin_0"), val = tensor([0, 25600, 0, 0])]; tensor var_10039_end_0 = const()[name = string("op_10039_end_0"), val = tensor([1, 26624, 1, 256])]; tensor var_10039_end_mask_0 = const()[name = string("op_10039_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10039_cast_fp16 = slice_by_index(begin = var_10039_begin_0, end = var_10039_end_0, end_mask = var_10039_end_mask_0, x = key_cache)[name = string("op_10039_cast_fp16")]; tensor var_10059_begin_0 = const()[name = string("op_10059_begin_0"), val = tensor([0, 25600, 0, 0])]; tensor var_10059_end_0 = const()[name = string("op_10059_end_0"), val = tensor([1, 26624, 1, 256])]; tensor var_10059_end_mask_0 = const()[name = string("op_10059_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10059_cast_fp16 = slice_by_index(begin = var_10059_begin_0, end = var_10059_end_0, end_mask = var_10059_end_mask_0, x = value_cache)[name = string("op_10059_cast_fp16")]; tensor var_10071_axes_0 = const()[name = string("op_10071_axes_0"), val = tensor([-1])]; tensor var_10071_cast_fp16 = squeeze(axes = var_10071_axes_0, x = hidden_99_cast_fp16)[name = string("op_10071_cast_fp16")]; tensor var_10073_axes_0 = const()[name = string("op_10073_axes_0"), val = tensor([-1])]; tensor var_10073_cast_fp16 = squeeze(axes = var_10073_axes_0, x = var_10071_cast_fp16)[name = string("op_10073_cast_fp16")]; tensor hidden_states_601_axes_0 = const()[name = string("hidden_states_601_axes_0"), val = tensor([0])]; tensor hidden_states_601_cast_fp16 = expand_dims(axes = hidden_states_601_axes_0, x = var_10073_cast_fp16)[name = string("hidden_states_601_cast_fp16")]; fp16 var_10079_promoted_to_fp16 = const()[name = string("op_10079_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10085_cast_fp16 = pow(x = hidden_states_601_cast_fp16, y = var_10079_promoted_to_fp16)[name = string("op_10085_cast_fp16")]; tensor variance_201_axes_0 = const()[name = string("variance_201_axes_0"), val = tensor([-1])]; bool variance_201_keep_dims_0 = const()[name = string("variance_201_keep_dims_0"), val = bool(true)]; tensor variance_201_cast_fp16 = reduce_mean(axes = variance_201_axes_0, keep_dims = variance_201_keep_dims_0, x = var_10085_cast_fp16)[name = string("variance_201_cast_fp16")]; fp16 var_10088_to_fp16 = const()[name = string("op_10088_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10089_cast_fp16 = add(x = variance_201_cast_fp16, y = var_10088_to_fp16)[name = string("op_10089_cast_fp16")]; fp32 var_10090_epsilon_0 = const()[name = string("op_10090_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10090_cast_fp16 = rsqrt(epsilon = var_10090_epsilon_0, x = var_10089_cast_fp16)[name = string("op_10090_cast_fp16")]; tensor hidden_states_605_cast_fp16 = mul(x = hidden_states_601_cast_fp16, y = var_10090_cast_fp16)[name = string("hidden_states_605_cast_fp16")]; tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393462336)))]; tensor input_251_cast_fp16 = mul(x = const_251_to_fp16, y = hidden_states_605_cast_fp16)[name = string("input_251_cast_fp16")]; tensor layers_25_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393464448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395561664))))[name = string("layers_25_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_25_self_attn_q_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor layers_25_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395562240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396610880))))[name = string("layers_25_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_k_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor layers_25_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396611456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660096))))[name = string("layers_25_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_v_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor var_10107 = const()[name = string("op_10107"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_607_cast_fp16 = reshape(shape = var_10107, x = linear_175_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; tensor var_10113 = const()[name = string("op_10113"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_613_cast_fp16 = reshape(shape = var_10113, x = linear_176_cast_fp16)[name = string("hidden_states_613_cast_fp16")]; tensor var_10119 = const()[name = string("op_10119"), val = tensor([1, 1, 8, 128])]; tensor v_153_cast_fp16 = reshape(shape = var_10119, x = linear_177_cast_fp16)[name = string("v_153_cast_fp16")]; fp16 var_10124_promoted_to_fp16 = const()[name = string("op_10124_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10130_cast_fp16 = pow(x = hidden_states_607_cast_fp16, y = var_10124_promoted_to_fp16)[name = string("op_10130_cast_fp16")]; tensor variance_203_axes_0 = const()[name = string("variance_203_axes_0"), val = tensor([-1])]; bool variance_203_keep_dims_0 = const()[name = string("variance_203_keep_dims_0"), val = bool(true)]; tensor variance_203_cast_fp16 = reduce_mean(axes = variance_203_axes_0, keep_dims = variance_203_keep_dims_0, x = var_10130_cast_fp16)[name = string("variance_203_cast_fp16")]; fp16 var_10133_to_fp16 = const()[name = string("op_10133_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10134_cast_fp16 = add(x = variance_203_cast_fp16, y = var_10133_to_fp16)[name = string("op_10134_cast_fp16")]; fp32 var_10135_epsilon_0 = const()[name = string("op_10135_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10135_cast_fp16 = rsqrt(epsilon = var_10135_epsilon_0, x = var_10134_cast_fp16)[name = string("op_10135_cast_fp16")]; tensor hidden_states_611_cast_fp16 = mul(x = hidden_states_607_cast_fp16, y = var_10135_cast_fp16)[name = string("hidden_states_611_cast_fp16")]; tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660672)))]; tensor q_203_cast_fp16 = mul(x = const_252_to_fp16, y = hidden_states_611_cast_fp16)[name = string("q_203_cast_fp16")]; fp16 var_10142_promoted_to_fp16 = const()[name = string("op_10142_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10148_cast_fp16 = pow(x = hidden_states_613_cast_fp16, y = var_10142_promoted_to_fp16)[name = string("op_10148_cast_fp16")]; tensor variance_205_axes_0 = const()[name = string("variance_205_axes_0"), val = tensor([-1])]; bool variance_205_keep_dims_0 = const()[name = string("variance_205_keep_dims_0"), val = bool(true)]; tensor variance_205_cast_fp16 = reduce_mean(axes = variance_205_axes_0, keep_dims = variance_205_keep_dims_0, x = var_10148_cast_fp16)[name = string("variance_205_cast_fp16")]; fp16 var_10151_to_fp16 = const()[name = string("op_10151_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10152_cast_fp16 = add(x = variance_205_cast_fp16, y = var_10151_to_fp16)[name = string("op_10152_cast_fp16")]; fp32 var_10153_epsilon_0 = const()[name = string("op_10153_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10153_cast_fp16 = rsqrt(epsilon = var_10153_epsilon_0, x = var_10152_cast_fp16)[name = string("op_10153_cast_fp16")]; tensor hidden_states_617_cast_fp16 = mul(x = hidden_states_613_cast_fp16, y = var_10153_cast_fp16)[name = string("hidden_states_617_cast_fp16")]; tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660992)))]; tensor k_203_cast_fp16 = mul(x = const_253_to_fp16, y = hidden_states_617_cast_fp16)[name = string("k_203_cast_fp16")]; tensor q_205_perm_0 = const()[name = string("q_205_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_205_perm_0 = const()[name = string("k_205_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_155_perm_0 = const()[name = string("v_155_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_205_cast_fp16 = transpose(perm = q_205_perm_0, x = q_203_cast_fp16)[name = string("transpose_11")]; tensor var_10170_cast_fp16 = mul(x = q_205_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10170_cast_fp16")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_205_cast_fp16)[name = string("x1_101_cast_fp16")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_205_cast_fp16)[name = string("x2_101_cast_fp16")]; fp16 const_256_promoted_to_fp16 = const()[name = string("const_256_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10191_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_256_promoted_to_fp16)[name = string("op_10191_cast_fp16")]; int32 var_10193 = const()[name = string("op_10193"), val = int32(-1)]; bool var_10194_interleave_0 = const()[name = string("op_10194_interleave_0"), val = bool(false)]; tensor var_10194_cast_fp16 = concat(axis = var_10193, interleave = var_10194_interleave_0, values = (var_10191_cast_fp16, x1_101_cast_fp16))[name = string("op_10194_cast_fp16")]; tensor var_10195_cast_fp16 = mul(x = var_10194_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10195_cast_fp16")]; tensor q_207_cast_fp16 = add(x = var_10170_cast_fp16, y = var_10195_cast_fp16)[name = string("q_207_cast_fp16")]; tensor k_205_cast_fp16 = transpose(perm = k_205_perm_0, x = k_203_cast_fp16)[name = string("transpose_10")]; tensor var_10198_cast_fp16 = mul(x = k_205_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10198_cast_fp16")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_205_cast_fp16)[name = string("x1_103_cast_fp16")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_205_cast_fp16)[name = string("x2_103_cast_fp16")]; fp16 const_259_promoted_to_fp16 = const()[name = string("const_259_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10219_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_259_promoted_to_fp16)[name = string("op_10219_cast_fp16")]; int32 var_10221 = const()[name = string("op_10221"), val = int32(-1)]; bool var_10222_interleave_0 = const()[name = string("op_10222_interleave_0"), val = bool(false)]; tensor var_10222_cast_fp16 = concat(axis = var_10221, interleave = var_10222_interleave_0, values = (var_10219_cast_fp16, x1_103_cast_fp16))[name = string("op_10222_cast_fp16")]; tensor var_10223_cast_fp16 = mul(x = var_10222_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10223_cast_fp16")]; tensor k_207_cast_fp16 = add(x = var_10198_cast_fp16, y = var_10223_cast_fp16)[name = string("k_207_cast_fp16")]; tensor var_10230 = const()[name = string("op_10230"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_51_cast_fp16 = reshape(shape = var_10230, x = k_207_cast_fp16)[name = string("nk_flat_51_cast_fp16")]; tensor var_10236 = const()[name = string("op_10236"), val = tensor([1, 1024, 1, 1])]; tensor v_155_cast_fp16 = transpose(perm = v_155_perm_0, x = v_153_cast_fp16)[name = string("transpose_9")]; tensor nv_flat_51_cast_fp16 = reshape(shape = var_10236, x = v_155_cast_fp16)[name = string("nv_flat_51_cast_fp16")]; tensor var_10245_cast_fp16 = mul(x = var_10039_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10245_cast_fp16")]; tensor var_10246_cast_fp16 = mul(x = nk_flat_51_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10246_cast_fp16")]; tensor key_cache_105_cast_fp16 = add(x = var_10245_cast_fp16, y = var_10246_cast_fp16)[name = string("key_cache_105_cast_fp16")]; tensor var_10252_cast_fp16 = mul(x = var_10059_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10252_cast_fp16")]; tensor var_10253_cast_fp16 = mul(x = nv_flat_51_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10253_cast_fp16")]; tensor value_cache_105_cast_fp16 = add(x = var_10252_cast_fp16, y = var_10253_cast_fp16)[name = string("value_cache_105_cast_fp16")]; tensor kc_151_axes_0 = const()[name = string("kc_151_axes_0"), val = tensor([2])]; tensor kc_151_cast_fp16 = squeeze(axes = kc_151_axes_0, x = key_cache_105_cast_fp16)[name = string("kc_151_cast_fp16")]; tensor var_10262 = const()[name = string("op_10262"), val = tensor([1, 8, 128, 256])]; tensor kc_153_cast_fp16 = reshape(shape = var_10262, x = kc_151_cast_fp16)[name = string("kc_153_cast_fp16")]; tensor vc_151_axes_0 = const()[name = string("vc_151_axes_0"), val = tensor([2])]; tensor vc_151_cast_fp16 = squeeze(axes = vc_151_axes_0, x = value_cache_105_cast_fp16)[name = string("vc_151_cast_fp16")]; tensor var_10270 = const()[name = string("op_10270"), val = tensor([1, 8, 128, 256])]; tensor vc_153_cast_fp16 = reshape(shape = var_10270, x = vc_151_cast_fp16)[name = string("vc_153_cast_fp16")]; tensor var_10273_axes_0 = const()[name = string("op_10273_axes_0"), val = tensor([2])]; tensor var_10273_cast_fp16 = expand_dims(axes = var_10273_axes_0, x = kc_153_cast_fp16)[name = string("op_10273_cast_fp16")]; tensor var_10281_reps_0 = const()[name = string("op_10281_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10281_cast_fp16 = tile(reps = var_10281_reps_0, x = var_10273_cast_fp16)[name = string("op_10281_cast_fp16")]; tensor var_10286 = const()[name = string("op_10286"), val = tensor([1, 16, 128, 256])]; tensor kc_155_cast_fp16 = reshape(shape = var_10286, x = var_10281_cast_fp16)[name = string("kc_155_cast_fp16")]; tensor var_10289_axes_0 = const()[name = string("op_10289_axes_0"), val = tensor([2])]; tensor var_10289_cast_fp16 = expand_dims(axes = var_10289_axes_0, x = vc_153_cast_fp16)[name = string("op_10289_cast_fp16")]; tensor var_10297_reps_0 = const()[name = string("op_10297_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10297_cast_fp16 = tile(reps = var_10297_reps_0, x = var_10289_cast_fp16)[name = string("op_10297_cast_fp16")]; tensor var_10302 = const()[name = string("op_10302"), val = tensor([1, 16, 128, 256])]; tensor vc_155_cast_fp16 = reshape(shape = var_10302, x = var_10297_cast_fp16)[name = string("vc_155_cast_fp16")]; bool var_10304_transpose_x_0 = const()[name = string("op_10304_transpose_x_0"), val = bool(false)]; bool var_10304_transpose_y_0 = const()[name = string("op_10304_transpose_y_0"), val = bool(false)]; tensor var_10304_cast_fp16 = matmul(transpose_x = var_10304_transpose_x_0, transpose_y = var_10304_transpose_y_0, x = q_207_cast_fp16, y = kc_155_cast_fp16)[name = string("op_10304_cast_fp16")]; fp16 _inversed_attn_weights_201_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_201_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_201_cast_fp16 = mul(x = var_10304_cast_fp16, y = _inversed_attn_weights_201_y_0_to_fp16)[name = string("_inversed_attn_weights_201_cast_fp16")]; tensor attn_weights_203_cast_fp16 = add(x = _inversed_attn_weights_201_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_203_cast_fp16")]; int32 var_10318 = const()[name = string("op_10318"), val = int32(-1)]; tensor attn_weights_207_cast_fp16 = softmax(axis = var_10318, x = attn_weights_203_cast_fp16)[name = string("attn_weights_207_cast_fp16")]; bool attn_output_101_transpose_x_1 = const()[name = string("attn_output_101_transpose_x_1"), val = bool(false)]; bool attn_output_101_transpose_y_1 = const()[name = string("attn_output_101_transpose_y_1"), val = bool(true)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_1, transpose_y = attn_output_101_transpose_y_1, x = attn_weights_207_cast_fp16, y = vc_155_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_10327_perm_0 = const()[name = string("op_10327_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10331 = const()[name = string("op_10331"), val = tensor([1, 1, -1])]; tensor var_10327_cast_fp16 = transpose(perm = var_10327_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_8")]; tensor input_253_cast_fp16 = reshape(shape = var_10331, x = var_10327_cast_fp16)[name = string("input_253_cast_fp16")]; tensor layers_25_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397661312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399758528))))[name = string("layers_25_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_178_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_o_proj_weight_to_fp16_palettized, x = input_253_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor var_10337_axes_0 = const()[name = string("op_10337_axes_0"), val = tensor([0])]; tensor var_10337_cast_fp16 = squeeze(axes = var_10337_axes_0, x = linear_178_cast_fp16)[name = string("op_10337_cast_fp16")]; tensor var_10339_axes_0 = const()[name = string("op_10339_axes_0"), val = tensor([0])]; tensor var_10339_cast_fp16 = squeeze(axes = var_10339_axes_0, x = var_10337_cast_fp16)[name = string("op_10339_cast_fp16")]; tensor var_10341_axes_0 = const()[name = string("op_10341_axes_0"), val = tensor([-1])]; tensor var_10341_cast_fp16 = expand_dims(axes = var_10341_axes_0, x = var_10339_cast_fp16)[name = string("op_10341_cast_fp16")]; tensor attn_4d_51_axes_0 = const()[name = string("attn_4d_51_axes_0"), val = tensor([-1])]; tensor attn_4d_51_cast_fp16 = expand_dims(axes = attn_4d_51_axes_0, x = var_10341_cast_fp16)[name = string("attn_4d_51_cast_fp16")]; tensor hidden_101_cast_fp16 = add(x = hidden_99_cast_fp16, y = attn_4d_51_cast_fp16)[name = string("hidden_101_cast_fp16")]; tensor var_10347_axes_0 = const()[name = string("op_10347_axes_0"), val = tensor([-1])]; tensor var_10347_cast_fp16 = squeeze(axes = var_10347_axes_0, x = hidden_101_cast_fp16)[name = string("op_10347_cast_fp16")]; tensor var_10349_axes_0 = const()[name = string("op_10349_axes_0"), val = tensor([-1])]; tensor var_10349_cast_fp16 = squeeze(axes = var_10349_axes_0, x = var_10347_cast_fp16)[name = string("op_10349_cast_fp16")]; tensor hidden_states_619_axes_0 = const()[name = string("hidden_states_619_axes_0"), val = tensor([0])]; tensor hidden_states_619_cast_fp16 = expand_dims(axes = hidden_states_619_axes_0, x = var_10349_cast_fp16)[name = string("hidden_states_619_cast_fp16")]; fp16 var_10355_promoted_to_fp16 = const()[name = string("op_10355_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10361_cast_fp16 = pow(x = hidden_states_619_cast_fp16, y = var_10355_promoted_to_fp16)[name = string("op_10361_cast_fp16")]; tensor variance_207_axes_0 = const()[name = string("variance_207_axes_0"), val = tensor([-1])]; bool variance_207_keep_dims_0 = const()[name = string("variance_207_keep_dims_0"), val = bool(true)]; tensor variance_207_cast_fp16 = reduce_mean(axes = variance_207_axes_0, keep_dims = variance_207_keep_dims_0, x = var_10361_cast_fp16)[name = string("variance_207_cast_fp16")]; fp16 var_10364_to_fp16 = const()[name = string("op_10364_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10365_cast_fp16 = add(x = variance_207_cast_fp16, y = var_10364_to_fp16)[name = string("op_10365_cast_fp16")]; fp32 var_10366_epsilon_0 = const()[name = string("op_10366_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10366_cast_fp16 = rsqrt(epsilon = var_10366_epsilon_0, x = var_10365_cast_fp16)[name = string("op_10366_cast_fp16")]; tensor hidden_states_623_cast_fp16 = mul(x = hidden_states_619_cast_fp16, y = var_10366_cast_fp16)[name = string("hidden_states_623_cast_fp16")]; tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399759104)))]; tensor input_255_cast_fp16 = mul(x = const_260_to_fp16, y = hidden_states_623_cast_fp16)[name = string("input_255_cast_fp16")]; tensor layers_25_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399761216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907008))))[name = string("layers_25_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_gate_proj_weight_to_fp16_palettized, x = input_255_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor var_10376_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_10376_cast_fp16")]; tensor layers_25_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053376))))[name = string("layers_25_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_up_proj_weight_to_fp16_palettized, x = input_255_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor input_259_cast_fp16 = mul(x = var_10376_cast_fp16, y = linear_180_cast_fp16)[name = string("input_259_cast_fp16")]; tensor layers_25_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409199744))))[name = string("layers_25_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_mlp_down_proj_weight_to_fp16_palettized, x = input_259_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor var_10383_axes_0 = const()[name = string("op_10383_axes_0"), val = tensor([0])]; tensor var_10383_cast_fp16 = squeeze(axes = var_10383_axes_0, x = linear_181_cast_fp16)[name = string("op_10383_cast_fp16")]; tensor var_10385_axes_0 = const()[name = string("op_10385_axes_0"), val = tensor([0])]; tensor var_10385_cast_fp16 = squeeze(axes = var_10385_axes_0, x = var_10383_cast_fp16)[name = string("op_10385_cast_fp16")]; tensor var_10387_axes_0 = const()[name = string("op_10387_axes_0"), val = tensor([-1])]; tensor var_10387_cast_fp16 = expand_dims(axes = var_10387_axes_0, x = var_10385_cast_fp16)[name = string("op_10387_cast_fp16")]; tensor mlp_4d_51_axes_0 = const()[name = string("mlp_4d_51_axes_0"), val = tensor([-1])]; tensor mlp_4d_51_cast_fp16 = expand_dims(axes = mlp_4d_51_axes_0, x = var_10387_cast_fp16)[name = string("mlp_4d_51_cast_fp16")]; tensor hidden_103_cast_fp16 = add(x = hidden_101_cast_fp16, y = mlp_4d_51_cast_fp16)[name = string("hidden_103_cast_fp16")]; tensor var_10401_begin_0 = const()[name = string("op_10401_begin_0"), val = tensor([0, 26624, 0, 0])]; tensor var_10401_end_0 = const()[name = string("op_10401_end_0"), val = tensor([1, 27648, 1, 256])]; tensor var_10401_end_mask_0 = const()[name = string("op_10401_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10401_cast_fp16 = slice_by_index(begin = var_10401_begin_0, end = var_10401_end_0, end_mask = var_10401_end_mask_0, x = key_cache)[name = string("op_10401_cast_fp16")]; tensor var_10421_begin_0 = const()[name = string("op_10421_begin_0"), val = tensor([0, 26624, 0, 0])]; tensor var_10421_end_0 = const()[name = string("op_10421_end_0"), val = tensor([1, 27648, 1, 256])]; tensor var_10421_end_mask_0 = const()[name = string("op_10421_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10421_cast_fp16 = slice_by_index(begin = var_10421_begin_0, end = var_10421_end_0, end_mask = var_10421_end_mask_0, x = value_cache)[name = string("op_10421_cast_fp16")]; tensor var_10433_axes_0 = const()[name = string("op_10433_axes_0"), val = tensor([-1])]; tensor var_10433_cast_fp16 = squeeze(axes = var_10433_axes_0, x = hidden_103_cast_fp16)[name = string("op_10433_cast_fp16")]; tensor var_10435_axes_0 = const()[name = string("op_10435_axes_0"), val = tensor([-1])]; tensor var_10435_cast_fp16 = squeeze(axes = var_10435_axes_0, x = var_10433_cast_fp16)[name = string("op_10435_cast_fp16")]; tensor hidden_states_625_axes_0 = const()[name = string("hidden_states_625_axes_0"), val = tensor([0])]; tensor hidden_states_625_cast_fp16 = expand_dims(axes = hidden_states_625_axes_0, x = var_10435_cast_fp16)[name = string("hidden_states_625_cast_fp16")]; fp16 var_10441_promoted_to_fp16 = const()[name = string("op_10441_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10447_cast_fp16 = pow(x = hidden_states_625_cast_fp16, y = var_10441_promoted_to_fp16)[name = string("op_10447_cast_fp16")]; tensor variance_209_axes_0 = const()[name = string("variance_209_axes_0"), val = tensor([-1])]; bool variance_209_keep_dims_0 = const()[name = string("variance_209_keep_dims_0"), val = bool(true)]; tensor variance_209_cast_fp16 = reduce_mean(axes = variance_209_axes_0, keep_dims = variance_209_keep_dims_0, x = var_10447_cast_fp16)[name = string("variance_209_cast_fp16")]; fp16 var_10450_to_fp16 = const()[name = string("op_10450_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10451_cast_fp16 = add(x = variance_209_cast_fp16, y = var_10450_to_fp16)[name = string("op_10451_cast_fp16")]; fp32 var_10452_epsilon_0 = const()[name = string("op_10452_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10452_cast_fp16 = rsqrt(epsilon = var_10452_epsilon_0, x = var_10451_cast_fp16)[name = string("op_10452_cast_fp16")]; tensor hidden_states_629_cast_fp16 = mul(x = hidden_states_625_cast_fp16, y = var_10452_cast_fp16)[name = string("hidden_states_629_cast_fp16")]; tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409200320)))]; tensor input_261_cast_fp16 = mul(x = const_261_to_fp16, y = hidden_states_629_cast_fp16)[name = string("input_261_cast_fp16")]; tensor layers_26_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409202432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411299648))))[name = string("layers_26_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_26_self_attn_q_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_182_cast_fp16")]; tensor layers_26_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411300224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412348864))))[name = string("layers_26_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_k_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor layers_26_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412349440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398080))))[name = string("layers_26_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_v_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor var_10469 = const()[name = string("op_10469"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_631_cast_fp16 = reshape(shape = var_10469, x = linear_182_cast_fp16)[name = string("hidden_states_631_cast_fp16")]; tensor var_10475 = const()[name = string("op_10475"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_637_cast_fp16 = reshape(shape = var_10475, x = linear_183_cast_fp16)[name = string("hidden_states_637_cast_fp16")]; tensor var_10481 = const()[name = string("op_10481"), val = tensor([1, 1, 8, 128])]; tensor v_159_cast_fp16 = reshape(shape = var_10481, x = linear_184_cast_fp16)[name = string("v_159_cast_fp16")]; fp16 var_10486_promoted_to_fp16 = const()[name = string("op_10486_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10492_cast_fp16 = pow(x = hidden_states_631_cast_fp16, y = var_10486_promoted_to_fp16)[name = string("op_10492_cast_fp16")]; tensor variance_211_axes_0 = const()[name = string("variance_211_axes_0"), val = tensor([-1])]; bool variance_211_keep_dims_0 = const()[name = string("variance_211_keep_dims_0"), val = bool(true)]; tensor variance_211_cast_fp16 = reduce_mean(axes = variance_211_axes_0, keep_dims = variance_211_keep_dims_0, x = var_10492_cast_fp16)[name = string("variance_211_cast_fp16")]; fp16 var_10495_to_fp16 = const()[name = string("op_10495_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10496_cast_fp16 = add(x = variance_211_cast_fp16, y = var_10495_to_fp16)[name = string("op_10496_cast_fp16")]; fp32 var_10497_epsilon_0 = const()[name = string("op_10497_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10497_cast_fp16 = rsqrt(epsilon = var_10497_epsilon_0, x = var_10496_cast_fp16)[name = string("op_10497_cast_fp16")]; tensor hidden_states_635_cast_fp16 = mul(x = hidden_states_631_cast_fp16, y = var_10497_cast_fp16)[name = string("hidden_states_635_cast_fp16")]; tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398656)))]; tensor q_211_cast_fp16 = mul(x = const_262_to_fp16, y = hidden_states_635_cast_fp16)[name = string("q_211_cast_fp16")]; fp16 var_10504_promoted_to_fp16 = const()[name = string("op_10504_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10510_cast_fp16 = pow(x = hidden_states_637_cast_fp16, y = var_10504_promoted_to_fp16)[name = string("op_10510_cast_fp16")]; tensor variance_213_axes_0 = const()[name = string("variance_213_axes_0"), val = tensor([-1])]; bool variance_213_keep_dims_0 = const()[name = string("variance_213_keep_dims_0"), val = bool(true)]; tensor variance_213_cast_fp16 = reduce_mean(axes = variance_213_axes_0, keep_dims = variance_213_keep_dims_0, x = var_10510_cast_fp16)[name = string("variance_213_cast_fp16")]; fp16 var_10513_to_fp16 = const()[name = string("op_10513_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10514_cast_fp16 = add(x = variance_213_cast_fp16, y = var_10513_to_fp16)[name = string("op_10514_cast_fp16")]; fp32 var_10515_epsilon_0 = const()[name = string("op_10515_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10515_cast_fp16 = rsqrt(epsilon = var_10515_epsilon_0, x = var_10514_cast_fp16)[name = string("op_10515_cast_fp16")]; tensor hidden_states_641_cast_fp16 = mul(x = hidden_states_637_cast_fp16, y = var_10515_cast_fp16)[name = string("hidden_states_641_cast_fp16")]; tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398976)))]; tensor k_211_cast_fp16 = mul(x = const_263_to_fp16, y = hidden_states_641_cast_fp16)[name = string("k_211_cast_fp16")]; tensor q_213_perm_0 = const()[name = string("q_213_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_213_perm_0 = const()[name = string("k_213_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_161_perm_0 = const()[name = string("v_161_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_213_cast_fp16 = transpose(perm = q_213_perm_0, x = q_211_cast_fp16)[name = string("transpose_7")]; tensor var_10532_cast_fp16 = mul(x = q_213_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10532_cast_fp16")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_213_cast_fp16)[name = string("x1_105_cast_fp16")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_213_cast_fp16)[name = string("x2_105_cast_fp16")]; fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10553_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_10553_cast_fp16")]; int32 var_10555 = const()[name = string("op_10555"), val = int32(-1)]; bool var_10556_interleave_0 = const()[name = string("op_10556_interleave_0"), val = bool(false)]; tensor var_10556_cast_fp16 = concat(axis = var_10555, interleave = var_10556_interleave_0, values = (var_10553_cast_fp16, x1_105_cast_fp16))[name = string("op_10556_cast_fp16")]; tensor var_10557_cast_fp16 = mul(x = var_10556_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10557_cast_fp16")]; tensor q_215_cast_fp16 = add(x = var_10532_cast_fp16, y = var_10557_cast_fp16)[name = string("q_215_cast_fp16")]; tensor k_213_cast_fp16 = transpose(perm = k_213_perm_0, x = k_211_cast_fp16)[name = string("transpose_6")]; tensor var_10560_cast_fp16 = mul(x = k_213_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10560_cast_fp16")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_213_cast_fp16)[name = string("x1_107_cast_fp16")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_213_cast_fp16)[name = string("x2_107_cast_fp16")]; fp16 const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10581_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_269_promoted_to_fp16)[name = string("op_10581_cast_fp16")]; int32 var_10583 = const()[name = string("op_10583"), val = int32(-1)]; bool var_10584_interleave_0 = const()[name = string("op_10584_interleave_0"), val = bool(false)]; tensor var_10584_cast_fp16 = concat(axis = var_10583, interleave = var_10584_interleave_0, values = (var_10581_cast_fp16, x1_107_cast_fp16))[name = string("op_10584_cast_fp16")]; tensor var_10585_cast_fp16 = mul(x = var_10584_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10585_cast_fp16")]; tensor k_215_cast_fp16 = add(x = var_10560_cast_fp16, y = var_10585_cast_fp16)[name = string("k_215_cast_fp16")]; tensor var_10592 = const()[name = string("op_10592"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_53_cast_fp16 = reshape(shape = var_10592, x = k_215_cast_fp16)[name = string("nk_flat_53_cast_fp16")]; tensor var_10598 = const()[name = string("op_10598"), val = tensor([1, 1024, 1, 1])]; tensor v_161_cast_fp16 = transpose(perm = v_161_perm_0, x = v_159_cast_fp16)[name = string("transpose_5")]; tensor nv_flat_53_cast_fp16 = reshape(shape = var_10598, x = v_161_cast_fp16)[name = string("nv_flat_53_cast_fp16")]; tensor var_10607_cast_fp16 = mul(x = var_10401_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10607_cast_fp16")]; tensor var_10608_cast_fp16 = mul(x = nk_flat_53_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10608_cast_fp16")]; tensor key_cache_109_cast_fp16 = add(x = var_10607_cast_fp16, y = var_10608_cast_fp16)[name = string("key_cache_109_cast_fp16")]; tensor var_10614_cast_fp16 = mul(x = var_10421_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10614_cast_fp16")]; tensor var_10615_cast_fp16 = mul(x = nv_flat_53_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10615_cast_fp16")]; tensor value_cache_109_cast_fp16 = add(x = var_10614_cast_fp16, y = var_10615_cast_fp16)[name = string("value_cache_109_cast_fp16")]; tensor kc_157_axes_0 = const()[name = string("kc_157_axes_0"), val = tensor([2])]; tensor kc_157_cast_fp16 = squeeze(axes = kc_157_axes_0, x = key_cache_109_cast_fp16)[name = string("kc_157_cast_fp16")]; tensor var_10624 = const()[name = string("op_10624"), val = tensor([1, 8, 128, 256])]; tensor kc_159_cast_fp16 = reshape(shape = var_10624, x = kc_157_cast_fp16)[name = string("kc_159_cast_fp16")]; tensor vc_157_axes_0 = const()[name = string("vc_157_axes_0"), val = tensor([2])]; tensor vc_157_cast_fp16 = squeeze(axes = vc_157_axes_0, x = value_cache_109_cast_fp16)[name = string("vc_157_cast_fp16")]; tensor var_10632 = const()[name = string("op_10632"), val = tensor([1, 8, 128, 256])]; tensor vc_159_cast_fp16 = reshape(shape = var_10632, x = vc_157_cast_fp16)[name = string("vc_159_cast_fp16")]; tensor var_10635_axes_0 = const()[name = string("op_10635_axes_0"), val = tensor([2])]; tensor var_10635_cast_fp16 = expand_dims(axes = var_10635_axes_0, x = kc_159_cast_fp16)[name = string("op_10635_cast_fp16")]; tensor var_10643_reps_0 = const()[name = string("op_10643_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10643_cast_fp16 = tile(reps = var_10643_reps_0, x = var_10635_cast_fp16)[name = string("op_10643_cast_fp16")]; tensor var_10648 = const()[name = string("op_10648"), val = tensor([1, 16, 128, 256])]; tensor kc_161_cast_fp16 = reshape(shape = var_10648, x = var_10643_cast_fp16)[name = string("kc_161_cast_fp16")]; tensor var_10651_axes_0 = const()[name = string("op_10651_axes_0"), val = tensor([2])]; tensor var_10651_cast_fp16 = expand_dims(axes = var_10651_axes_0, x = vc_159_cast_fp16)[name = string("op_10651_cast_fp16")]; tensor var_10659_reps_0 = const()[name = string("op_10659_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10659_cast_fp16 = tile(reps = var_10659_reps_0, x = var_10651_cast_fp16)[name = string("op_10659_cast_fp16")]; tensor var_10664 = const()[name = string("op_10664"), val = tensor([1, 16, 128, 256])]; tensor vc_161_cast_fp16 = reshape(shape = var_10664, x = var_10659_cast_fp16)[name = string("vc_161_cast_fp16")]; bool var_10666_transpose_x_0 = const()[name = string("op_10666_transpose_x_0"), val = bool(false)]; bool var_10666_transpose_y_0 = const()[name = string("op_10666_transpose_y_0"), val = bool(false)]; tensor var_10666_cast_fp16 = matmul(transpose_x = var_10666_transpose_x_0, transpose_y = var_10666_transpose_y_0, x = q_215_cast_fp16, y = kc_161_cast_fp16)[name = string("op_10666_cast_fp16")]; fp16 _inversed_attn_weights_209_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_209_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_209_cast_fp16 = mul(x = var_10666_cast_fp16, y = _inversed_attn_weights_209_y_0_to_fp16)[name = string("_inversed_attn_weights_209_cast_fp16")]; tensor attn_weights_211_cast_fp16 = add(x = _inversed_attn_weights_209_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_211_cast_fp16")]; int32 var_10680 = const()[name = string("op_10680"), val = int32(-1)]; tensor attn_weights_215_cast_fp16 = softmax(axis = var_10680, x = attn_weights_211_cast_fp16)[name = string("attn_weights_215_cast_fp16")]; bool attn_output_105_transpose_x_1 = const()[name = string("attn_output_105_transpose_x_1"), val = bool(false)]; bool attn_output_105_transpose_y_1 = const()[name = string("attn_output_105_transpose_y_1"), val = bool(true)]; tensor attn_output_105_cast_fp16 = matmul(transpose_x = attn_output_105_transpose_x_1, transpose_y = attn_output_105_transpose_y_1, x = attn_weights_215_cast_fp16, y = vc_161_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_10689_perm_0 = const()[name = string("op_10689_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10693 = const()[name = string("op_10693"), val = tensor([1, 1, -1])]; tensor var_10689_cast_fp16 = transpose(perm = var_10689_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_4")]; tensor input_263_cast_fp16 = reshape(shape = var_10693, x = var_10689_cast_fp16)[name = string("input_263_cast_fp16")]; tensor layers_26_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413399296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415496512))))[name = string("layers_26_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_o_proj_weight_to_fp16_palettized, x = input_263_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor var_10699_axes_0 = const()[name = string("op_10699_axes_0"), val = tensor([0])]; tensor var_10699_cast_fp16 = squeeze(axes = var_10699_axes_0, x = linear_185_cast_fp16)[name = string("op_10699_cast_fp16")]; tensor var_10701_axes_0 = const()[name = string("op_10701_axes_0"), val = tensor([0])]; tensor var_10701_cast_fp16 = squeeze(axes = var_10701_axes_0, x = var_10699_cast_fp16)[name = string("op_10701_cast_fp16")]; tensor var_10703_axes_0 = const()[name = string("op_10703_axes_0"), val = tensor([-1])]; tensor var_10703_cast_fp16 = expand_dims(axes = var_10703_axes_0, x = var_10701_cast_fp16)[name = string("op_10703_cast_fp16")]; tensor attn_4d_53_axes_0 = const()[name = string("attn_4d_53_axes_0"), val = tensor([-1])]; tensor attn_4d_53_cast_fp16 = expand_dims(axes = attn_4d_53_axes_0, x = var_10703_cast_fp16)[name = string("attn_4d_53_cast_fp16")]; tensor hidden_105_cast_fp16 = add(x = hidden_103_cast_fp16, y = attn_4d_53_cast_fp16)[name = string("hidden_105_cast_fp16")]; tensor var_10709_axes_0 = const()[name = string("op_10709_axes_0"), val = tensor([-1])]; tensor var_10709_cast_fp16 = squeeze(axes = var_10709_axes_0, x = hidden_105_cast_fp16)[name = string("op_10709_cast_fp16")]; tensor var_10711_axes_0 = const()[name = string("op_10711_axes_0"), val = tensor([-1])]; tensor var_10711_cast_fp16 = squeeze(axes = var_10711_axes_0, x = var_10709_cast_fp16)[name = string("op_10711_cast_fp16")]; tensor hidden_states_643_axes_0 = const()[name = string("hidden_states_643_axes_0"), val = tensor([0])]; tensor hidden_states_643_cast_fp16 = expand_dims(axes = hidden_states_643_axes_0, x = var_10711_cast_fp16)[name = string("hidden_states_643_cast_fp16")]; fp16 var_10717_promoted_to_fp16 = const()[name = string("op_10717_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10723_cast_fp16 = pow(x = hidden_states_643_cast_fp16, y = var_10717_promoted_to_fp16)[name = string("op_10723_cast_fp16")]; tensor variance_215_axes_0 = const()[name = string("variance_215_axes_0"), val = tensor([-1])]; bool variance_215_keep_dims_0 = const()[name = string("variance_215_keep_dims_0"), val = bool(true)]; tensor variance_215_cast_fp16 = reduce_mean(axes = variance_215_axes_0, keep_dims = variance_215_keep_dims_0, x = var_10723_cast_fp16)[name = string("variance_215_cast_fp16")]; fp16 var_10726_to_fp16 = const()[name = string("op_10726_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10727_cast_fp16 = add(x = variance_215_cast_fp16, y = var_10726_to_fp16)[name = string("op_10727_cast_fp16")]; fp32 var_10728_epsilon_0 = const()[name = string("op_10728_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10728_cast_fp16 = rsqrt(epsilon = var_10728_epsilon_0, x = var_10727_cast_fp16)[name = string("op_10728_cast_fp16")]; tensor hidden_states_647_cast_fp16 = mul(x = hidden_states_643_cast_fp16, y = var_10728_cast_fp16)[name = string("hidden_states_647_cast_fp16")]; tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415497088)))]; tensor input_265_cast_fp16 = mul(x = const_270_to_fp16, y = hidden_states_647_cast_fp16)[name = string("input_265_cast_fp16")]; tensor layers_26_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415499200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418644992))))[name = string("layers_26_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_gate_proj_weight_to_fp16_palettized, x = input_265_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_10738_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_10738_cast_fp16")]; tensor layers_26_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418645568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791360))))[name = string("layers_26_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_up_proj_weight_to_fp16_palettized, x = input_265_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor input_269_cast_fp16 = mul(x = var_10738_cast_fp16, y = linear_187_cast_fp16)[name = string("input_269_cast_fp16")]; tensor layers_26_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424937728))))[name = string("layers_26_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_mlp_down_proj_weight_to_fp16_palettized, x = input_269_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor var_10745_axes_0 = const()[name = string("op_10745_axes_0"), val = tensor([0])]; tensor var_10745_cast_fp16 = squeeze(axes = var_10745_axes_0, x = linear_188_cast_fp16)[name = string("op_10745_cast_fp16")]; tensor var_10747_axes_0 = const()[name = string("op_10747_axes_0"), val = tensor([0])]; tensor var_10747_cast_fp16 = squeeze(axes = var_10747_axes_0, x = var_10745_cast_fp16)[name = string("op_10747_cast_fp16")]; tensor var_10749_axes_0 = const()[name = string("op_10749_axes_0"), val = tensor([-1])]; tensor var_10749_cast_fp16 = expand_dims(axes = var_10749_axes_0, x = var_10747_cast_fp16)[name = string("op_10749_cast_fp16")]; tensor mlp_4d_53_axes_0 = const()[name = string("mlp_4d_53_axes_0"), val = tensor([-1])]; tensor mlp_4d_53_cast_fp16 = expand_dims(axes = mlp_4d_53_axes_0, x = var_10749_cast_fp16)[name = string("mlp_4d_53_cast_fp16")]; tensor hidden_107_cast_fp16 = add(x = hidden_105_cast_fp16, y = mlp_4d_53_cast_fp16)[name = string("hidden_107_cast_fp16")]; tensor var_10763_begin_0 = const()[name = string("op_10763_begin_0"), val = tensor([0, 27648, 0, 0])]; tensor var_10763_end_0 = const()[name = string("op_10763_end_0"), val = tensor([1, 1, 1, 256])]; tensor var_10763_end_mask_0 = const()[name = string("op_10763_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_10763_cast_fp16 = slice_by_index(begin = var_10763_begin_0, end = var_10763_end_0, end_mask = var_10763_end_mask_0, x = key_cache)[name = string("op_10763_cast_fp16")]; tensor var_10783_begin_0 = const()[name = string("op_10783_begin_0"), val = tensor([0, 27648, 0, 0])]; tensor var_10783_end_0 = const()[name = string("op_10783_end_0"), val = tensor([1, 1, 1, 256])]; tensor var_10783_end_mask_0 = const()[name = string("op_10783_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_10783_cast_fp16 = slice_by_index(begin = var_10783_begin_0, end = var_10783_end_0, end_mask = var_10783_end_mask_0, x = value_cache)[name = string("op_10783_cast_fp16")]; tensor var_10795_axes_0 = const()[name = string("op_10795_axes_0"), val = tensor([-1])]; tensor var_10795_cast_fp16 = squeeze(axes = var_10795_axes_0, x = hidden_107_cast_fp16)[name = string("op_10795_cast_fp16")]; tensor var_10797_axes_0 = const()[name = string("op_10797_axes_0"), val = tensor([-1])]; tensor var_10797_cast_fp16 = squeeze(axes = var_10797_axes_0, x = var_10795_cast_fp16)[name = string("op_10797_cast_fp16")]; tensor hidden_states_649_axes_0 = const()[name = string("hidden_states_649_axes_0"), val = tensor([0])]; tensor hidden_states_649_cast_fp16 = expand_dims(axes = hidden_states_649_axes_0, x = var_10797_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; fp16 var_10803_promoted_to_fp16 = const()[name = string("op_10803_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10809_cast_fp16 = pow(x = hidden_states_649_cast_fp16, y = var_10803_promoted_to_fp16)[name = string("op_10809_cast_fp16")]; tensor variance_217_axes_0 = const()[name = string("variance_217_axes_0"), val = tensor([-1])]; bool variance_217_keep_dims_0 = const()[name = string("variance_217_keep_dims_0"), val = bool(true)]; tensor variance_217_cast_fp16 = reduce_mean(axes = variance_217_axes_0, keep_dims = variance_217_keep_dims_0, x = var_10809_cast_fp16)[name = string("variance_217_cast_fp16")]; fp16 var_10812_to_fp16 = const()[name = string("op_10812_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10813_cast_fp16 = add(x = variance_217_cast_fp16, y = var_10812_to_fp16)[name = string("op_10813_cast_fp16")]; fp32 var_10814_epsilon_0 = const()[name = string("op_10814_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10814_cast_fp16 = rsqrt(epsilon = var_10814_epsilon_0, x = var_10813_cast_fp16)[name = string("op_10814_cast_fp16")]; tensor hidden_states_653_cast_fp16 = mul(x = hidden_states_649_cast_fp16, y = var_10814_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424938304)))]; tensor input_271_cast_fp16 = mul(x = const_271_to_fp16, y = hidden_states_653_cast_fp16)[name = string("input_271_cast_fp16")]; tensor layers_27_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424940416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427037632))))[name = string("layers_27_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_27_self_attn_q_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor layers_27_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427038208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428086848))))[name = string("layers_27_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_k_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_190_cast_fp16")]; tensor layers_27_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136064))))[name = string("layers_27_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_v_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor var_10831 = const()[name = string("op_10831"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_655_cast_fp16 = reshape(shape = var_10831, x = linear_189_cast_fp16)[name = string("hidden_states_655_cast_fp16")]; tensor var_10837 = const()[name = string("op_10837"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_661_cast_fp16 = reshape(shape = var_10837, x = linear_190_cast_fp16)[name = string("hidden_states_661_cast_fp16")]; tensor var_10843 = const()[name = string("op_10843"), val = tensor([1, 1, 8, 128])]; tensor v_165_cast_fp16 = reshape(shape = var_10843, x = linear_191_cast_fp16)[name = string("v_165_cast_fp16")]; fp16 var_10848_promoted_to_fp16 = const()[name = string("op_10848_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10854_cast_fp16 = pow(x = hidden_states_655_cast_fp16, y = var_10848_promoted_to_fp16)[name = string("op_10854_cast_fp16")]; tensor variance_219_axes_0 = const()[name = string("variance_219_axes_0"), val = tensor([-1])]; bool variance_219_keep_dims_0 = const()[name = string("variance_219_keep_dims_0"), val = bool(true)]; tensor variance_219_cast_fp16 = reduce_mean(axes = variance_219_axes_0, keep_dims = variance_219_keep_dims_0, x = var_10854_cast_fp16)[name = string("variance_219_cast_fp16")]; fp16 var_10857_to_fp16 = const()[name = string("op_10857_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10858_cast_fp16 = add(x = variance_219_cast_fp16, y = var_10857_to_fp16)[name = string("op_10858_cast_fp16")]; fp32 var_10859_epsilon_0 = const()[name = string("op_10859_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10859_cast_fp16 = rsqrt(epsilon = var_10859_epsilon_0, x = var_10858_cast_fp16)[name = string("op_10859_cast_fp16")]; tensor hidden_states_659_cast_fp16 = mul(x = hidden_states_655_cast_fp16, y = var_10859_cast_fp16)[name = string("hidden_states_659_cast_fp16")]; tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136640)))]; tensor q_219_cast_fp16 = mul(x = const_272_to_fp16, y = hidden_states_659_cast_fp16)[name = string("q_219_cast_fp16")]; fp16 var_10866_promoted_to_fp16 = const()[name = string("op_10866_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10872_cast_fp16 = pow(x = hidden_states_661_cast_fp16, y = var_10866_promoted_to_fp16)[name = string("op_10872_cast_fp16")]; tensor variance_221_axes_0 = const()[name = string("variance_221_axes_0"), val = tensor([-1])]; bool variance_221_keep_dims_0 = const()[name = string("variance_221_keep_dims_0"), val = bool(true)]; tensor variance_221_cast_fp16 = reduce_mean(axes = variance_221_axes_0, keep_dims = variance_221_keep_dims_0, x = var_10872_cast_fp16)[name = string("variance_221_cast_fp16")]; fp16 var_10875_to_fp16 = const()[name = string("op_10875_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10876_cast_fp16 = add(x = variance_221_cast_fp16, y = var_10875_to_fp16)[name = string("op_10876_cast_fp16")]; fp32 var_10877_epsilon_0 = const()[name = string("op_10877_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10877_cast_fp16 = rsqrt(epsilon = var_10877_epsilon_0, x = var_10876_cast_fp16)[name = string("op_10877_cast_fp16")]; tensor hidden_states_665_cast_fp16 = mul(x = hidden_states_661_cast_fp16, y = var_10877_cast_fp16)[name = string("hidden_states_665_cast_fp16")]; tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136960)))]; tensor k_219_cast_fp16 = mul(x = const_273_to_fp16, y = hidden_states_665_cast_fp16)[name = string("k_219_cast_fp16")]; tensor q_221_perm_0 = const()[name = string("q_221_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_221_perm_0 = const()[name = string("k_221_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_perm_0 = const()[name = string("v_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_221_cast_fp16 = transpose(perm = q_221_perm_0, x = q_219_cast_fp16)[name = string("transpose_3")]; tensor var_10894_cast_fp16 = mul(x = q_221_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10894_cast_fp16")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_221_cast_fp16)[name = string("x1_109_cast_fp16")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_221_cast_fp16)[name = string("x2_109_cast_fp16")]; fp16 const_276_promoted_to_fp16 = const()[name = string("const_276_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10915_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_276_promoted_to_fp16)[name = string("op_10915_cast_fp16")]; int32 var_10917 = const()[name = string("op_10917"), val = int32(-1)]; bool var_10918_interleave_0 = const()[name = string("op_10918_interleave_0"), val = bool(false)]; tensor var_10918_cast_fp16 = concat(axis = var_10917, interleave = var_10918_interleave_0, values = (var_10915_cast_fp16, x1_109_cast_fp16))[name = string("op_10918_cast_fp16")]; tensor var_10919_cast_fp16 = mul(x = var_10918_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10919_cast_fp16")]; tensor q_cast_fp16 = add(x = var_10894_cast_fp16, y = var_10919_cast_fp16)[name = string("q_cast_fp16")]; tensor k_221_cast_fp16 = transpose(perm = k_221_perm_0, x = k_219_cast_fp16)[name = string("transpose_2")]; tensor var_10922_cast_fp16 = mul(x = k_221_cast_fp16, y = cos_3_cast_fp16)[name = string("op_10922_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_221_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_221_cast_fp16)[name = string("x2_cast_fp16")]; fp16 const_279_promoted_to_fp16 = const()[name = string("const_279_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10943_cast_fp16 = mul(x = x2_cast_fp16, y = const_279_promoted_to_fp16)[name = string("op_10943_cast_fp16")]; int32 var_10945 = const()[name = string("op_10945"), val = int32(-1)]; bool var_10946_interleave_0 = const()[name = string("op_10946_interleave_0"), val = bool(false)]; tensor var_10946_cast_fp16 = concat(axis = var_10945, interleave = var_10946_interleave_0, values = (var_10943_cast_fp16, x1_cast_fp16))[name = string("op_10946_cast_fp16")]; tensor var_10947_cast_fp16 = mul(x = var_10946_cast_fp16, y = sin_3_cast_fp16)[name = string("op_10947_cast_fp16")]; tensor k_cast_fp16 = add(x = var_10922_cast_fp16, y = var_10947_cast_fp16)[name = string("k_cast_fp16")]; tensor var_10954 = const()[name = string("op_10954"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_cast_fp16 = reshape(shape = var_10954, x = k_cast_fp16)[name = string("nk_flat_cast_fp16")]; tensor var_10960 = const()[name = string("op_10960"), val = tensor([1, 1024, 1, 1])]; tensor v_cast_fp16 = transpose(perm = v_perm_0, x = v_165_cast_fp16)[name = string("transpose_1")]; tensor nv_flat_cast_fp16 = reshape(shape = var_10960, x = v_cast_fp16)[name = string("nv_flat_cast_fp16")]; tensor var_10969_cast_fp16 = mul(x = var_10763_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10969_cast_fp16")]; tensor var_10970_cast_fp16 = mul(x = nk_flat_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10970_cast_fp16")]; tensor key_cache_cast_fp16 = add(x = var_10969_cast_fp16, y = var_10970_cast_fp16)[name = string("key_cache_cast_fp16")]; tensor var_10976_cast_fp16 = mul(x = var_10783_cast_fp16, y = var_1194_cast_fp16)[name = string("op_10976_cast_fp16")]; tensor var_10977_cast_fp16 = mul(x = nv_flat_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_10977_cast_fp16")]; tensor value_cache_cast_fp16 = add(x = var_10976_cast_fp16, y = var_10977_cast_fp16)[name = string("value_cache_cast_fp16")]; tensor kc_163_axes_0 = const()[name = string("kc_163_axes_0"), val = tensor([2])]; tensor kc_163_cast_fp16 = squeeze(axes = kc_163_axes_0, x = key_cache_cast_fp16)[name = string("kc_163_cast_fp16")]; tensor var_10986 = const()[name = string("op_10986"), val = tensor([1, 8, 128, 256])]; tensor kc_165_cast_fp16 = reshape(shape = var_10986, x = kc_163_cast_fp16)[name = string("kc_165_cast_fp16")]; tensor vc_163_axes_0 = const()[name = string("vc_163_axes_0"), val = tensor([2])]; tensor vc_163_cast_fp16 = squeeze(axes = vc_163_axes_0, x = value_cache_cast_fp16)[name = string("vc_163_cast_fp16")]; tensor var_10994 = const()[name = string("op_10994"), val = tensor([1, 8, 128, 256])]; tensor vc_165_cast_fp16 = reshape(shape = var_10994, x = vc_163_cast_fp16)[name = string("vc_165_cast_fp16")]; tensor var_10997_axes_0 = const()[name = string("op_10997_axes_0"), val = tensor([2])]; tensor var_10997_cast_fp16 = expand_dims(axes = var_10997_axes_0, x = kc_165_cast_fp16)[name = string("op_10997_cast_fp16")]; tensor var_11005_reps_0 = const()[name = string("op_11005_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_11005_cast_fp16 = tile(reps = var_11005_reps_0, x = var_10997_cast_fp16)[name = string("op_11005_cast_fp16")]; tensor var_11010 = const()[name = string("op_11010"), val = tensor([1, 16, 128, 256])]; tensor kc_cast_fp16 = reshape(shape = var_11010, x = var_11005_cast_fp16)[name = string("kc_cast_fp16")]; tensor var_11013_axes_0 = const()[name = string("op_11013_axes_0"), val = tensor([2])]; tensor var_11013_cast_fp16 = expand_dims(axes = var_11013_axes_0, x = vc_165_cast_fp16)[name = string("op_11013_cast_fp16")]; tensor var_11021_reps_0 = const()[name = string("op_11021_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_11021_cast_fp16 = tile(reps = var_11021_reps_0, x = var_11013_cast_fp16)[name = string("op_11021_cast_fp16")]; tensor var_11026 = const()[name = string("op_11026"), val = tensor([1, 16, 128, 256])]; tensor vc_cast_fp16 = reshape(shape = var_11026, x = var_11021_cast_fp16)[name = string("vc_cast_fp16")]; bool var_11028_transpose_x_0 = const()[name = string("op_11028_transpose_x_0"), val = bool(false)]; bool var_11028_transpose_y_0 = const()[name = string("op_11028_transpose_y_0"), val = bool(false)]; tensor var_11028_cast_fp16 = matmul(transpose_x = var_11028_transpose_x_0, transpose_y = var_11028_transpose_y_0, x = q_cast_fp16, y = kc_cast_fp16)[name = string("op_11028_cast_fp16")]; fp16 _inversed_attn_weights_217_y_0_to_fp16 = const()[name = string("_inversed_attn_weights_217_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_attn_weights_217_cast_fp16 = mul(x = var_11028_cast_fp16, y = _inversed_attn_weights_217_y_0_to_fp16)[name = string("_inversed_attn_weights_217_cast_fp16")]; tensor attn_weights_219_cast_fp16 = add(x = _inversed_attn_weights_217_cast_fp16, y = mask_1_cast_fp16)[name = string("attn_weights_219_cast_fp16")]; int32 var_11042 = const()[name = string("op_11042"), val = int32(-1)]; tensor attn_weights_cast_fp16 = softmax(axis = var_11042, x = attn_weights_219_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_109_transpose_x_1 = const()[name = string("attn_output_109_transpose_x_1"), val = bool(false)]; bool attn_output_109_transpose_y_1 = const()[name = string("attn_output_109_transpose_y_1"), val = bool(true)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_1, transpose_y = attn_output_109_transpose_y_1, x = attn_weights_cast_fp16, y = vc_cast_fp16)[name = string("attn_output_109_cast_fp16")]; tensor var_11051_perm_0 = const()[name = string("op_11051_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11055 = const()[name = string("op_11055"), val = tensor([1, 1, -1])]; tensor var_11051_cast_fp16 = transpose(perm = var_11051_perm_0, x = attn_output_109_cast_fp16)[name = string("transpose_0")]; tensor input_273_cast_fp16 = reshape(shape = var_11055, x = var_11051_cast_fp16)[name = string("input_273_cast_fp16")]; tensor layers_27_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429137280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431234496))))[name = string("layers_27_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_192_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_o_proj_weight_to_fp16_palettized, x = input_273_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor var_11061_axes_0 = const()[name = string("op_11061_axes_0"), val = tensor([0])]; tensor var_11061_cast_fp16 = squeeze(axes = var_11061_axes_0, x = linear_192_cast_fp16)[name = string("op_11061_cast_fp16")]; tensor var_11063_axes_0 = const()[name = string("op_11063_axes_0"), val = tensor([0])]; tensor var_11063_cast_fp16 = squeeze(axes = var_11063_axes_0, x = var_11061_cast_fp16)[name = string("op_11063_cast_fp16")]; tensor var_11065_axes_0 = const()[name = string("op_11065_axes_0"), val = tensor([-1])]; tensor var_11065_cast_fp16 = expand_dims(axes = var_11065_axes_0, x = var_11063_cast_fp16)[name = string("op_11065_cast_fp16")]; tensor attn_4d_axes_0 = const()[name = string("attn_4d_axes_0"), val = tensor([-1])]; tensor attn_4d_cast_fp16 = expand_dims(axes = attn_4d_axes_0, x = var_11065_cast_fp16)[name = string("attn_4d_cast_fp16")]; tensor hidden_109_cast_fp16 = add(x = hidden_107_cast_fp16, y = attn_4d_cast_fp16)[name = string("hidden_109_cast_fp16")]; tensor var_11071_axes_0 = const()[name = string("op_11071_axes_0"), val = tensor([-1])]; tensor var_11071_cast_fp16 = squeeze(axes = var_11071_axes_0, x = hidden_109_cast_fp16)[name = string("op_11071_cast_fp16")]; tensor var_11073_axes_0 = const()[name = string("op_11073_axes_0"), val = tensor([-1])]; tensor var_11073_cast_fp16 = squeeze(axes = var_11073_axes_0, x = var_11071_cast_fp16)[name = string("op_11073_cast_fp16")]; tensor hidden_states_667_axes_0 = const()[name = string("hidden_states_667_axes_0"), val = tensor([0])]; tensor hidden_states_667_cast_fp16 = expand_dims(axes = hidden_states_667_axes_0, x = var_11073_cast_fp16)[name = string("hidden_states_667_cast_fp16")]; fp16 var_11079_promoted_to_fp16 = const()[name = string("op_11079_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11085_cast_fp16 = pow(x = hidden_states_667_cast_fp16, y = var_11079_promoted_to_fp16)[name = string("op_11085_cast_fp16")]; tensor variance_223_axes_0 = const()[name = string("variance_223_axes_0"), val = tensor([-1])]; bool variance_223_keep_dims_0 = const()[name = string("variance_223_keep_dims_0"), val = bool(true)]; tensor variance_223_cast_fp16 = reduce_mean(axes = variance_223_axes_0, keep_dims = variance_223_keep_dims_0, x = var_11085_cast_fp16)[name = string("variance_223_cast_fp16")]; fp16 var_11088_to_fp16 = const()[name = string("op_11088_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11089_cast_fp16 = add(x = variance_223_cast_fp16, y = var_11088_to_fp16)[name = string("op_11089_cast_fp16")]; fp32 var_11090_epsilon_0 = const()[name = string("op_11090_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11090_cast_fp16 = rsqrt(epsilon = var_11090_epsilon_0, x = var_11089_cast_fp16)[name = string("op_11090_cast_fp16")]; tensor hidden_states_671_cast_fp16 = mul(x = hidden_states_667_cast_fp16, y = var_11090_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431235072)))]; tensor input_275_cast_fp16 = mul(x = const_280_to_fp16, y = hidden_states_671_cast_fp16)[name = string("input_275_cast_fp16")]; tensor layers_27_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431237184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434382976))))[name = string("layers_27_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_gate_proj_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_11100_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_11100_cast_fp16")]; tensor layers_27_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434383552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529344))))[name = string("layers_27_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_up_proj_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor input_279_cast_fp16 = mul(x = var_11100_cast_fp16, y = linear_194_cast_fp16)[name = string("input_279_cast_fp16")]; tensor layers_27_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440675712))))[name = string("layers_27_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_195_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_mlp_down_proj_weight_to_fp16_palettized, x = input_279_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor var_11107_axes_0 = const()[name = string("op_11107_axes_0"), val = tensor([0])]; tensor var_11107_cast_fp16 = squeeze(axes = var_11107_axes_0, x = linear_195_cast_fp16)[name = string("op_11107_cast_fp16")]; tensor var_11109_axes_0 = const()[name = string("op_11109_axes_0"), val = tensor([0])]; tensor var_11109_cast_fp16 = squeeze(axes = var_11109_axes_0, x = var_11107_cast_fp16)[name = string("op_11109_cast_fp16")]; tensor var_11111_axes_0 = const()[name = string("op_11111_axes_0"), val = tensor([-1])]; tensor var_11111_cast_fp16 = expand_dims(axes = var_11111_axes_0, x = var_11109_cast_fp16)[name = string("op_11111_cast_fp16")]; tensor mlp_4d_axes_0 = const()[name = string("mlp_4d_axes_0"), val = tensor([-1])]; tensor mlp_4d_cast_fp16 = expand_dims(axes = mlp_4d_axes_0, x = var_11111_cast_fp16)[name = string("mlp_4d_cast_fp16")]; tensor hidden_cast_fp16 = add(x = hidden_109_cast_fp16, y = mlp_4d_cast_fp16)[name = string("hidden_cast_fp16")]; tensor var_11117_axes_0 = const()[name = string("op_11117_axes_0"), val = tensor([-1])]; tensor var_11117_cast_fp16 = squeeze(axes = var_11117_axes_0, x = hidden_cast_fp16)[name = string("op_11117_cast_fp16")]; tensor var_11119_axes_0 = const()[name = string("op_11119_axes_0"), val = tensor([-1])]; tensor var_11119_cast_fp16 = squeeze(axes = var_11119_axes_0, x = var_11117_cast_fp16)[name = string("op_11119_cast_fp16")]; tensor hidden_states_673_axes_0 = const()[name = string("hidden_states_673_axes_0"), val = tensor([0])]; tensor hidden_states_673_cast_fp16 = expand_dims(axes = hidden_states_673_axes_0, x = var_11119_cast_fp16)[name = string("hidden_states_673_cast_fp16")]; fp16 var_11125_promoted_to_fp16 = const()[name = string("op_11125_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11131_cast_fp16 = pow(x = hidden_states_673_cast_fp16, y = var_11125_promoted_to_fp16)[name = string("op_11131_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_11131_cast_fp16)[name = string("variance_cast_fp16")]; fp16 var_11134_to_fp16 = const()[name = string("op_11134_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11135_cast_fp16 = add(x = variance_cast_fp16, y = var_11134_to_fp16)[name = string("op_11135_cast_fp16")]; fp32 var_11136_epsilon_0 = const()[name = string("op_11136_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11136_cast_fp16 = rsqrt(epsilon = var_11136_epsilon_0, x = var_11135_cast_fp16)[name = string("op_11136_cast_fp16")]; tensor hidden_states_cast_fp16 = mul(x = hidden_states_673_cast_fp16, y = var_11136_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440676288)))]; tensor input_cast_fp16 = mul(x = const_281_to_fp16, y = hidden_states_cast_fp16)[name = string("input_cast_fp16")]; tensor codec_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440678400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443824192))))[name = string("codec_head_weight_to_fp16_palettized")]; tensor logits = linear(bias = linear_4_bias_0_to_fp16, weight = codec_head_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("linear_196_cast_fp16")]; int32 var_11144 = const()[name = string("op_11144"), val = int32(1)]; bool new_kv_1_interleave_0 = const()[name = string("new_kv_1_interleave_0"), val = bool(false)]; tensor new_kv_1_cast_fp16 = concat(axis = var_11144, interleave = new_kv_1_interleave_0, values = (nk_flat_1_cast_fp16, nk_flat_3_cast_fp16, nk_flat_5_cast_fp16, nk_flat_7_cast_fp16, nk_flat_9_cast_fp16, nk_flat_11_cast_fp16, nk_flat_13_cast_fp16, nk_flat_15_cast_fp16, nk_flat_17_cast_fp16, nk_flat_19_cast_fp16, nk_flat_21_cast_fp16, nk_flat_23_cast_fp16, nk_flat_25_cast_fp16, nk_flat_27_cast_fp16, nk_flat_29_cast_fp16, nk_flat_31_cast_fp16, nk_flat_33_cast_fp16, nk_flat_35_cast_fp16, nk_flat_37_cast_fp16, nk_flat_39_cast_fp16, nk_flat_41_cast_fp16, nk_flat_43_cast_fp16, nk_flat_45_cast_fp16, nk_flat_47_cast_fp16, nk_flat_49_cast_fp16, nk_flat_51_cast_fp16, nk_flat_53_cast_fp16, nk_flat_cast_fp16))[name = string("new_kv_1_cast_fp16")]; tensor var_11153_cast_fp16 = mul(x = key_cache, y = var_1194_cast_fp16)[name = string("op_11153_cast_fp16")]; tensor var_11154_cast_fp16 = mul(x = new_kv_1_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_11154_cast_fp16")]; tensor new_key_cache = add(x = var_11153_cast_fp16, y = var_11154_cast_fp16)[name = string("op_11156_cast_fp16")]; int32 var_11158 = const()[name = string("op_11158"), val = int32(1)]; bool new_kv_interleave_0 = const()[name = string("new_kv_interleave_0"), val = bool(false)]; tensor new_kv_cast_fp16 = concat(axis = var_11158, interleave = new_kv_interleave_0, values = (nv_flat_1_cast_fp16, nv_flat_3_cast_fp16, nv_flat_5_cast_fp16, nv_flat_7_cast_fp16, nv_flat_9_cast_fp16, nv_flat_11_cast_fp16, nv_flat_13_cast_fp16, nv_flat_15_cast_fp16, nv_flat_17_cast_fp16, nv_flat_19_cast_fp16, nv_flat_21_cast_fp16, nv_flat_23_cast_fp16, nv_flat_25_cast_fp16, nv_flat_27_cast_fp16, nv_flat_29_cast_fp16, nv_flat_31_cast_fp16, nv_flat_33_cast_fp16, nv_flat_35_cast_fp16, nv_flat_37_cast_fp16, nv_flat_39_cast_fp16, nv_flat_41_cast_fp16, nv_flat_43_cast_fp16, nv_flat_45_cast_fp16, nv_flat_47_cast_fp16, nv_flat_49_cast_fp16, nv_flat_51_cast_fp16, nv_flat_53_cast_fp16, nv_flat_cast_fp16))[name = string("new_kv_cast_fp16")]; tensor var_11167_cast_fp16 = mul(x = value_cache, y = var_1194_cast_fp16)[name = string("op_11167_cast_fp16")]; tensor var_11168_cast_fp16 = mul(x = new_kv_cast_fp16, y = update_mask_1_cast_fp16)[name = string("op_11168_cast_fp16")]; tensor new_value_cache = add(x = var_11167_cast_fp16, y = var_11168_cast_fp16)[name = string("op_11170_cast_fp16")]; tensor var_11172_axes_0 = const()[name = string("op_11172_axes_0"), val = tensor([0])]; tensor var_11172_cast_fp16 = squeeze(axes = var_11172_axes_0, x = input_cast_fp16)[name = string("op_11172_cast_fp16")]; tensor var_11174_axes_0 = const()[name = string("op_11174_axes_0"), val = tensor([-1])]; tensor var_11174_cast_fp16 = expand_dims(axes = var_11174_axes_0, x = var_11172_cast_fp16)[name = string("op_11174_cast_fp16")]; tensor var_11176_axes_0 = const()[name = string("op_11176_axes_0"), val = tensor([-1])]; tensor hidden_states = expand_dims(axes = var_11176_axes_0, x = var_11174_cast_fp16)[name = string("op_11176_cast_fp16")]; } -> (logits, hidden_states, new_key_cache, new_value_cache); }