program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor attention_mask, tensor input_ids) [FlexibleShapeInformation = tuple>>, tuple>>>>((("DefaultShapes", {{"attention_mask", [1, 64]}, {"input_ids", [1, 64]}}), ("EnumeratedShapes", {{"090f2853", {{"attention_mask", [1, 128]}, {"input_ids", [1, 128]}}}, {"0a731900", {{"attention_mask", [1, 256]}, {"input_ids", [1, 256]}}}, {"6a71677c", {{"attention_mask", [1, 1024]}, {"input_ids", [1, 1024]}}}, {"7b263bfe", {{"attention_mask", [1, 512]}, {"input_ids", [1, 512]}}}, {"8ecaa44d", {{"attention_mask", [1, 2048]}, {"input_ids", [1, 2048]}}}, {"d8f542e5", {{"attention_mask", [1, 64]}, {"input_ids", [1, 64]}}}})))] { int32 hidden_states_1_batch_dims_0 = const()[name = string("hidden_states_1_batch_dims_0"), val = int32(0)]; bool hidden_states_1_validate_indices_0 = const()[name = string("hidden_states_1_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(151669)]; tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_56 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_56")]; int32 hidden_states_1_cast_fp16_axis_0 = const()[name = string("hidden_states_1_cast_fp16_axis_0"), val = int32(0)]; tensor hidden_states_1_cast_fp16 = gather(axis = hidden_states_1_cast_fp16_axis_0, batch_dims = hidden_states_1_batch_dims_0, indices = select_56, validate_indices = hidden_states_1_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("hidden_states_1_cast_fp16")]; tensor var_72_shape = shape(x = input_ids)[name = string("op_72_shape")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_72_shape_to_uint16_dtype_0 = const()[name = string("op_72_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_0_indices_0_to_uint16 = const()[name = string("gather_0_indices_0_to_uint16"), val = uint16(1)]; tensor var_72_shape_to_uint16 = cast(dtype = var_72_shape_to_uint16_dtype_0, x = var_72_shape)[name = string("cast_397")]; uint16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = gather_0_indices_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_72_shape_to_uint16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 const_0 = const()[name = string("const_0"), val = int32(0)]; int32 const_1 = const()[name = string("const_1"), val = int32(1)]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_396")]; tensor var_81 = range_1d(end = gather_0_cast_uint16_to_int32, start = const_0, step = const_1)[name = string("op_81")]; tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; tensor position_ids = expand_dims(axes = position_ids_axes_0, x = var_81)[name = string("position_ids")]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (gather_0_cast_uint16_to_int32, gather_0_cast_uint16_to_int32))[name = string("concat_0")]; fp16 fill_0_value_0_to_fp16 = const()[name = string("fill_0_value_0_to_fp16"), val = fp16(0x1p+0)]; tensor fill_0_cast_fp16 = fill(shape = concat_0, value = fill_0_value_0_to_fp16)[name = string("fill_0_cast_fp16")]; int32 band_part_0_lower_0 = const()[name = string("band_part_0_lower_0"), val = int32(-1)]; int32 band_part_0_upper_0 = const()[name = string("band_part_0_upper_0"), val = int32(0)]; tensor band_part_0_cast_fp16 = band_part(lower = band_part_0_lower_0, upper = band_part_0_upper_0, x = fill_0_cast_fp16)[name = string("band_part_0_cast_fp16")]; fp16 var_92_to_fp16 = const()[name = string("op_92_to_fp16"), val = fp16(0x1p+0)]; tensor var_94_cast_fp16 = sub(x = var_92_to_fp16, y = band_part_0_cast_fp16)[name = string("op_94_cast_fp16")]; fp16 var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = fp16(-0x1.ffcp+15)]; tensor causal_mask_1_cast_fp16 = mul(x = var_94_cast_fp16, y = var_95_to_fp16)[name = string("causal_mask_1_cast_fp16")]; tensor var_98_axes_0 = const()[name = string("op_98_axes_0"), val = tensor([0])]; tensor var_98_cast_fp16 = expand_dims(axes = var_98_axes_0, x = causal_mask_1_cast_fp16)[name = string("op_98_cast_fp16")]; tensor causal_mask_axes_0 = const()[name = string("causal_mask_axes_0"), val = tensor([0])]; tensor causal_mask_cast_fp16 = expand_dims(axes = causal_mask_axes_0, x = var_98_cast_fp16)[name = string("causal_mask_cast_fp16")]; fp16 var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = fp16(0x1p+0)]; string var_105_to_fp16_dtype_0 = const()[name = string("op_105_to_fp16_dtype_0"), val = string("fp16")]; tensor attention_mask_to_fp16 = cast(dtype = var_105_to_fp16_dtype_0, x = attention_mask)[name = string("cast_395")]; tensor var_108_cast_fp16 = sub(x = var_106_to_fp16, y = attention_mask_to_fp16)[name = string("op_108_cast_fp16")]; fp16 var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = fp16(-0x1.ffcp+15)]; tensor padding_mask_1_cast_fp16 = mul(x = var_108_cast_fp16, y = var_109_to_fp16)[name = string("padding_mask_1_cast_fp16")]; tensor var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor([1])]; tensor var_112_cast_fp16 = expand_dims(axes = var_112_axes_0, x = padding_mask_1_cast_fp16)[name = string("op_112_cast_fp16")]; tensor padding_mask_axes_0 = const()[name = string("padding_mask_axes_0"), val = tensor([2])]; tensor padding_mask_cast_fp16 = expand_dims(axes = padding_mask_axes_0, x = var_112_cast_fp16)[name = string("padding_mask_cast_fp16")]; tensor attention_mask_cast_fp16 = add(x = causal_mask_cast_fp16, y = padding_mask_cast_fp16)[name = string("attention_mask_cast_fp16")]; int32 var_118 = const()[name = string("op_118"), val = int32(-1)]; tensor var_136_axes_0 = const()[name = string("op_136_axes_0"), val = tensor([1])]; tensor var_136 = expand_dims(axes = var_136_axes_0, x = position_ids)[name = string("op_136")]; bool var_141_transpose_x_0 = const()[name = string("op_141_transpose_x_0"), val = bool(false)]; bool var_141_transpose_y_0 = const()[name = string("op_141_transpose_y_0"), val = bool(false)]; tensor const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310618240)))]; string position_ids_expanded_1_to_fp16_dtype_0 = const()[name = string("position_ids_expanded_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_136_to_fp16 = cast(dtype = position_ids_expanded_1_to_fp16_dtype_0, x = var_136)[name = string("cast_394")]; tensor var_141_cast_fp16 = matmul(transpose_x = var_141_transpose_x_0, transpose_y = var_141_transpose_y_0, x = const_4_to_fp16, y = var_136_to_fp16)[name = string("op_141_cast_fp16")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_141_cast_fp16)[name = string("transpose_112")]; tensor emb_cast_fp16 = concat(axis = var_118, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor var_145_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_145_cast_fp16")]; tensor var_148_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_148_cast_fp16")]; int32 var_166 = const()[name = string("op_166"), val = int32(2)]; int32 var_167 = const()[name = string("op_167"), val = int32(-1)]; fp16 var_166_promoted_to_fp16 = const()[name = string("op_166_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_176_cast_fp16 = pow(x = hidden_states_1_cast_fp16, y = var_166_promoted_to_fp16)[name = string("op_176_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_176_cast_fp16)[name = string("variance_1_cast_fp16")]; fp16 var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_180_cast_fp16 = add(x = variance_1_cast_fp16, y = var_179_to_fp16)[name = string("op_180_cast_fp16")]; fp32 var_181_epsilon_0 = const()[name = string("op_181_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_181_cast_fp16 = rsqrt(epsilon = var_181_epsilon_0, x = var_180_cast_fp16)[name = string("op_181_cast_fp16")]; tensor hidden_states_5_cast_fp16 = mul(x = hidden_states_1_cast_fp16, y = var_181_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310618432)))]; tensor hidden_states_9_cast_fp16 = mul(x = layers_0_input_layernorm_weight_to_fp16, y = hidden_states_5_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor var_194_shape_cast_fp16 = shape(x = hidden_states_9_cast_fp16)[name = string("op_194_shape_cast_fp16")]; int32 gather_2 = const()[name = string("gather_2"), val = int32(1)]; int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; string var_194_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_194_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_3_indices_0_to_uint16 = const()[name = string("gather_3_indices_0_to_uint16"), val = uint16(1)]; tensor var_194_shape_cast_fp16_to_uint16 = cast(dtype = var_194_shape_cast_fp16_to_uint16_dtype_0, x = var_194_shape_cast_fp16)[name = string("cast_393")]; uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = gather_3_indices_0_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_194_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310620544)))]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314814912)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_11_cast_fp16 = reshape(shape = concat_2x, x = linear_0_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; fp16 var_166_promoted_1_to_fp16 = const()[name = string("op_166_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_202_cast_fp16 = pow(x = hidden_states_11_cast_fp16, y = var_166_promoted_1_to_fp16)[name = string("op_202_cast_fp16")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_202_cast_fp16)[name = string("variance_3_cast_fp16")]; fp16 var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_206_cast_fp16 = add(x = variance_3_cast_fp16, y = var_205_to_fp16)[name = string("op_206_cast_fp16")]; fp32 var_207_epsilon_0 = const()[name = string("op_207_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_207_cast_fp16 = rsqrt(epsilon = var_207_epsilon_0, x = var_206_cast_fp16)[name = string("op_207_cast_fp16")]; tensor hidden_states_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = var_207_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314819072)))]; tensor var_210_cast_fp16 = mul(x = layers_0_self_attn_q_norm_weight_to_fp16, y = hidden_states_15_cast_fp16)[name = string("op_210_cast_fp16")]; tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314819392)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316916608)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor concat_3x = const()[name = string("concat_3x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_19_cast_fp16 = reshape(shape = concat_3x, x = linear_1_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; fp16 var_166_promoted_2_to_fp16 = const()[name = string("op_166_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_218_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_166_promoted_2_to_fp16)[name = string("op_218_cast_fp16")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_218_cast_fp16)[name = string("variance_5_cast_fp16")]; fp16 var_221_to_fp16 = const()[name = string("op_221_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_222_cast_fp16 = add(x = variance_5_cast_fp16, y = var_221_to_fp16)[name = string("op_222_cast_fp16")]; fp32 var_223_epsilon_0 = const()[name = string("op_223_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_223_cast_fp16 = rsqrt(epsilon = var_223_epsilon_0, x = var_222_cast_fp16)[name = string("op_223_cast_fp16")]; tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_223_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316918720)))]; tensor var_226_cast_fp16 = mul(x = layers_0_self_attn_k_norm_weight_to_fp16, y = hidden_states_23_cast_fp16)[name = string("op_226_cast_fp16")]; tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316919040)))]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor concat_4x = const()[name = string("concat_4x"), val = tensor([1, -1, 8, 128])]; tensor var_231_cast_fp16 = reshape(shape = concat_4x, x = linear_2_cast_fp16)[name = string("op_231_cast_fp16")]; tensor hidden_states_31_perm_0 = const()[name = string("hidden_states_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_5_axes_0 = const()[name = string("cos_5_axes_0"), val = tensor([1])]; tensor cos_5_cast_fp16 = expand_dims(axes = cos_5_axes_0, x = var_145_cast_fp16)[name = string("cos_5_cast_fp16")]; tensor sin_5_axes_0 = const()[name = string("sin_5_axes_0"), val = tensor([1])]; tensor sin_5_cast_fp16 = expand_dims(axes = sin_5_axes_0, x = var_148_cast_fp16)[name = string("sin_5_cast_fp16")]; tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_210_cast_fp16)[name = string("transpose_111")]; tensor var_235_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5_cast_fp16)[name = string("op_235_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_246_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_246_cast_fp16")]; bool var_248_interleave_0 = const()[name = string("op_248_interleave_0"), val = bool(false)]; tensor var_248_cast_fp16 = concat(axis = var_167, interleave = var_248_interleave_0, values = (var_246_cast_fp16, x1_1_cast_fp16))[name = string("op_248_cast_fp16")]; tensor var_249_cast_fp16 = mul(x = var_248_cast_fp16, y = sin_5_cast_fp16)[name = string("op_249_cast_fp16")]; tensor query_1_cast_fp16 = add(x = var_235_cast_fp16, y = var_249_cast_fp16)[name = string("query_1_cast_fp16")]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_226_cast_fp16)[name = string("transpose_110")]; tensor var_251_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5_cast_fp16)[name = string("op_251_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_262_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_262_cast_fp16")]; bool var_264_interleave_0 = const()[name = string("op_264_interleave_0"), val = bool(false)]; tensor var_264_cast_fp16 = concat(axis = var_167, interleave = var_264_interleave_0, values = (var_262_cast_fp16, x1_3_cast_fp16))[name = string("op_264_cast_fp16")]; tensor var_265_cast_fp16 = mul(x = var_264_cast_fp16, y = sin_5_cast_fp16)[name = string("op_265_cast_fp16")]; tensor hidden_states_27_cast_fp16 = add(x = var_251_cast_fp16, y = var_265_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor var_267_shape_cast_fp16 = shape(x = hidden_states_27_cast_fp16)[name = string("op_267_shape_cast_fp16")]; int32 gather_8 = const()[name = string("gather_8"), val = int32(1)]; int32 gather_9 = const()[name = string("gather_9"), val = int32(8)]; int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; string var_267_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_267_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_10_indices_0_to_uint16 = const()[name = string("gather_10_indices_0_to_uint16"), val = uint16(2)]; tensor var_267_shape_cast_fp16_to_uint16 = cast(dtype = var_267_shape_cast_fp16_to_uint16_dtype_0, x = var_267_shape_cast_fp16)[name = string("cast_391")]; uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = gather_10_indices_0_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_267_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_11 = const()[name = string("gather_11"), val = int32(128)]; tensor var_274_axes_0 = const()[name = string("op_274_axes_0"), val = tensor([2])]; tensor var_274_cast_fp16 = expand_dims(axes = var_274_axes_0, x = hidden_states_27_cast_fp16)[name = string("op_274_cast_fp16")]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_390")]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (gather_8, gather_9, var_166, gather_10_cast_uint16_to_int32, gather_11))[name = string("concat_5")]; tensor shape_0_cast_fp16 = shape(x = var_274_cast_fp16)[name = string("shape_0_cast_fp16")]; int32 equal_0_y_0 = const()[name = string("equal_0_y_0"), val = int32(-1)]; tensor equal_0 = equal(x = concat_5, y = equal_0_y_0)[name = string("equal_0")]; tensor select_0 = select(a = shape_0_cast_fp16, b = concat_5, cond = equal_0)[name = string("select_0")]; tensor real_div_0 = real_div(x = select_0, y = shape_0_cast_fp16)[name = string("real_div_0")]; tensor hidden_states_29_cast_fp16 = tile(reps = real_div_0, x = var_274_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor concat_6x = const()[name = string("concat_6x"), val = tensor([1, 16, -1, 128])]; tensor key_states_1_cast_fp16 = reshape(shape = concat_6x, x = hidden_states_29_cast_fp16)[name = string("key_states_1_cast_fp16")]; tensor hidden_states_31_cast_fp16 = transpose(perm = hidden_states_31_perm_0, x = var_231_cast_fp16)[name = string("transpose_109")]; tensor var_284_shape_cast_fp16 = shape(x = hidden_states_31_cast_fp16)[name = string("op_284_shape_cast_fp16")]; int32 gather_12 = const()[name = string("gather_12"), val = int32(1)]; int32 gather_13 = const()[name = string("gather_13"), val = int32(8)]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_14_indices_0_to_uint16 = const()[name = string("gather_14_indices_0_to_uint16"), val = uint16(2)]; tensor var_284_shape_cast_fp16_to_uint16 = cast(dtype = var_284_shape_cast_fp16_to_uint16_dtype_0, x = var_284_shape_cast_fp16)[name = string("cast_389")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = gather_14_indices_0_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_284_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_15 = const()[name = string("gather_15"), val = int32(128)]; tensor var_291_axes_0 = const()[name = string("op_291_axes_0"), val = tensor([2])]; tensor var_291_cast_fp16 = expand_dims(axes = var_291_axes_0, x = hidden_states_31_cast_fp16)[name = string("op_291_cast_fp16")]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_388")]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (gather_12, gather_13, var_166, gather_14_cast_uint16_to_int32, gather_15))[name = string("concat_7")]; tensor shape_1_cast_fp16 = shape(x = var_291_cast_fp16)[name = string("shape_1_cast_fp16")]; int32 equal_1_y_0 = const()[name = string("equal_1_y_0"), val = int32(-1)]; tensor equal_1 = equal(x = concat_7, y = equal_1_y_0)[name = string("equal_1")]; tensor select_1 = select(a = shape_1_cast_fp16, b = concat_7, cond = equal_1)[name = string("select_1")]; tensor real_div_1 = real_div(x = select_1, y = shape_1_cast_fp16)[name = string("real_div_1")]; tensor hidden_states_33_cast_fp16 = tile(reps = real_div_1, x = var_291_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor concat_8x = const()[name = string("concat_8x"), val = tensor([1, 16, -1, 128])]; tensor value_states_1_cast_fp16 = reshape(shape = concat_8x, x = hidden_states_33_cast_fp16)[name = string("value_states_1_cast_fp16")]; bool var_302_transpose_x_1 = const()[name = string("op_302_transpose_x_1"), val = bool(false)]; bool var_302_transpose_y_1 = const()[name = string("op_302_transpose_y_1"), val = bool(true)]; tensor var_302_cast_fp16 = matmul(transpose_x = var_302_transpose_x_1, transpose_y = var_302_transpose_y_1, x = query_1_cast_fp16, y = key_states_1_cast_fp16)[name = string("op_302_cast_fp16")]; fp16 var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_302_cast_fp16, y = var_303_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor input_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_3_cast_fp16")]; tensor var_306_cast_fp16 = softmax(axis = var_167, x = input_3_cast_fp16)[name = string("op_306_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_306_cast_fp16, y = value_states_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_310_perm_0 = const()[name = string("op_310_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_392")]; tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (gather_2, gather_3_cast_uint16_to_int32, var_167))[name = string("concat_9")]; tensor var_310_cast_fp16 = transpose(perm = var_310_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_108")]; tensor var_313_cast_fp16 = reshape(shape = concat_9, x = var_310_cast_fp16)[name = string("op_313_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319016256)))]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = var_313_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_1_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; fp16 var_166_promoted_3_to_fp16 = const()[name = string("op_166_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_320_cast_fp16 = pow(x = hidden_states_37_cast_fp16, y = var_166_promoted_3_to_fp16)[name = string("op_320_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_320_cast_fp16)[name = string("variance_7_cast_fp16")]; fp16 var_323_to_fp16 = const()[name = string("op_323_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_324_cast_fp16 = add(x = variance_7_cast_fp16, y = var_323_to_fp16)[name = string("op_324_cast_fp16")]; fp32 var_325_epsilon_0 = const()[name = string("op_325_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_325_cast_fp16 = rsqrt(epsilon = var_325_epsilon_0, x = var_324_cast_fp16)[name = string("op_325_cast_fp16")]; tensor hidden_states_41_cast_fp16 = mul(x = hidden_states_37_cast_fp16, y = var_325_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323210624)))]; tensor input_9_cast_fp16 = mul(x = layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_41_cast_fp16)[name = string("input_9_cast_fp16")]; tensor layers_0_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_0_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323212736)))]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329504256)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_337_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_337_cast_fp16")]; tensor layers_0_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_0_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329510464)))]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_13_cast_fp16 = mul(x = var_337_cast_fp16, y = linear_5_cast_fp16)[name = string("input_13_cast_fp16")]; tensor layers_0_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_0_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335801984)))]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_47_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; int32 var_354 = const()[name = string("op_354"), val = int32(2)]; int32 var_355 = const()[name = string("op_355"), val = int32(-1)]; fp16 var_354_promoted_to_fp16 = const()[name = string("op_354_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_364_cast_fp16 = pow(x = hidden_states_47_cast_fp16, y = var_354_promoted_to_fp16)[name = string("op_364_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_364_cast_fp16)[name = string("variance_9_cast_fp16")]; fp16 var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_368_cast_fp16 = add(x = variance_9_cast_fp16, y = var_367_to_fp16)[name = string("op_368_cast_fp16")]; fp32 var_369_epsilon_0 = const()[name = string("op_369_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_369_cast_fp16 = rsqrt(epsilon = var_369_epsilon_0, x = var_368_cast_fp16)[name = string("op_369_cast_fp16")]; tensor hidden_states_51_cast_fp16 = mul(x = hidden_states_47_cast_fp16, y = var_369_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342093504)))]; tensor hidden_states_55_cast_fp16 = mul(x = layers_1_input_layernorm_weight_to_fp16, y = hidden_states_51_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; tensor var_382_shape_cast_fp16 = shape(x = hidden_states_55_cast_fp16)[name = string("op_382_shape_cast_fp16")]; int32 gather_16 = const()[name = string("gather_16"), val = int32(1)]; int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)]; int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)]; bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)]; string var_382_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_382_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_17_indices_0_to_uint16 = const()[name = string("gather_17_indices_0_to_uint16"), val = uint16(1)]; tensor var_382_shape_cast_fp16_to_uint16 = cast(dtype = var_382_shape_cast_fp16_to_uint16_dtype_0, x = var_382_shape_cast_fp16)[name = string("cast_387")]; uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = gather_17_indices_0_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_382_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")]; string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342095616)))]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor concat_10x = const()[name = string("concat_10x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_57_cast_fp16 = reshape(shape = concat_10x, x = linear_7_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; fp16 var_354_promoted_1_to_fp16 = const()[name = string("op_354_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_390_cast_fp16 = pow(x = hidden_states_57_cast_fp16, y = var_354_promoted_1_to_fp16)[name = string("op_390_cast_fp16")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_390_cast_fp16)[name = string("variance_11_cast_fp16")]; fp16 var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_394_cast_fp16 = add(x = variance_11_cast_fp16, y = var_393_to_fp16)[name = string("op_394_cast_fp16")]; fp32 var_395_epsilon_0 = const()[name = string("op_395_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_395_cast_fp16 = rsqrt(epsilon = var_395_epsilon_0, x = var_394_cast_fp16)[name = string("op_395_cast_fp16")]; tensor hidden_states_61_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = var_395_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346289984)))]; tensor var_398_cast_fp16 = mul(x = layers_1_self_attn_q_norm_weight_to_fp16, y = hidden_states_61_cast_fp16)[name = string("op_398_cast_fp16")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346290304)))]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor concat_11x = const()[name = string("concat_11x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_65_cast_fp16 = reshape(shape = concat_11x, x = linear_8_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; fp16 var_354_promoted_2_to_fp16 = const()[name = string("op_354_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_406_cast_fp16 = pow(x = hidden_states_65_cast_fp16, y = var_354_promoted_2_to_fp16)[name = string("op_406_cast_fp16")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_406_cast_fp16)[name = string("variance_13_cast_fp16")]; fp16 var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_410_cast_fp16 = add(x = variance_13_cast_fp16, y = var_409_to_fp16)[name = string("op_410_cast_fp16")]; fp32 var_411_epsilon_0 = const()[name = string("op_411_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_411_cast_fp16 = rsqrt(epsilon = var_411_epsilon_0, x = var_410_cast_fp16)[name = string("op_411_cast_fp16")]; tensor hidden_states_69_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = var_411_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348387520)))]; tensor var_414_cast_fp16 = mul(x = layers_1_self_attn_k_norm_weight_to_fp16, y = hidden_states_69_cast_fp16)[name = string("op_414_cast_fp16")]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348387840)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 8, 128])]; tensor var_419_cast_fp16 = reshape(shape = concat_12x, x = linear_9_cast_fp16)[name = string("op_419_cast_fp16")]; tensor hidden_states_77_perm_0 = const()[name = string("hidden_states_77_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_398_cast_fp16)[name = string("transpose_107")]; tensor var_423_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5_cast_fp16)[name = string("op_423_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_434_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_434_cast_fp16")]; bool var_436_interleave_0 = const()[name = string("op_436_interleave_0"), val = bool(false)]; tensor var_436_cast_fp16 = concat(axis = var_355, interleave = var_436_interleave_0, values = (var_434_cast_fp16, x1_5_cast_fp16))[name = string("op_436_cast_fp16")]; tensor var_437_cast_fp16 = mul(x = var_436_cast_fp16, y = sin_5_cast_fp16)[name = string("op_437_cast_fp16")]; tensor query_3_cast_fp16 = add(x = var_423_cast_fp16, y = var_437_cast_fp16)[name = string("query_3_cast_fp16")]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_414_cast_fp16)[name = string("transpose_106")]; tensor var_439_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5_cast_fp16)[name = string("op_439_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_450_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_450_cast_fp16")]; bool var_452_interleave_0 = const()[name = string("op_452_interleave_0"), val = bool(false)]; tensor var_452_cast_fp16 = concat(axis = var_355, interleave = var_452_interleave_0, values = (var_450_cast_fp16, x1_7_cast_fp16))[name = string("op_452_cast_fp16")]; tensor var_453_cast_fp16 = mul(x = var_452_cast_fp16, y = sin_5_cast_fp16)[name = string("op_453_cast_fp16")]; tensor hidden_states_73_cast_fp16 = add(x = var_439_cast_fp16, y = var_453_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; tensor var_455_shape_cast_fp16 = shape(x = hidden_states_73_cast_fp16)[name = string("op_455_shape_cast_fp16")]; int32 gather_22 = const()[name = string("gather_22"), val = int32(1)]; int32 gather_23 = const()[name = string("gather_23"), val = int32(8)]; int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)]; int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)]; bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)]; string var_455_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_455_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_24_indices_0_to_uint16 = const()[name = string("gather_24_indices_0_to_uint16"), val = uint16(2)]; tensor var_455_shape_cast_fp16_to_uint16 = cast(dtype = var_455_shape_cast_fp16_to_uint16_dtype_0, x = var_455_shape_cast_fp16)[name = string("cast_385")]; uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = gather_24_indices_0_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_455_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")]; string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_25 = const()[name = string("gather_25"), val = int32(128)]; tensor var_462_axes_0 = const()[name = string("op_462_axes_0"), val = tensor([2])]; tensor var_462_cast_fp16 = expand_dims(axes = var_462_axes_0, x = hidden_states_73_cast_fp16)[name = string("op_462_cast_fp16")]; int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_384")]; tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_22, gather_23, var_354, gather_24_cast_uint16_to_int32, gather_25))[name = string("concat_13")]; tensor shape_2_cast_fp16 = shape(x = var_462_cast_fp16)[name = string("shape_2_cast_fp16")]; int32 equal_2_y_0 = const()[name = string("equal_2_y_0"), val = int32(-1)]; tensor equal_2 = equal(x = concat_13, y = equal_2_y_0)[name = string("equal_2")]; tensor select_2 = select(a = shape_2_cast_fp16, b = concat_13, cond = equal_2)[name = string("select_2")]; tensor real_div_2 = real_div(x = select_2, y = shape_2_cast_fp16)[name = string("real_div_2")]; tensor hidden_states_75_cast_fp16 = tile(reps = real_div_2, x = var_462_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 16, -1, 128])]; tensor key_states_3_cast_fp16 = reshape(shape = concat_14x, x = hidden_states_75_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor hidden_states_77_cast_fp16 = transpose(perm = hidden_states_77_perm_0, x = var_419_cast_fp16)[name = string("transpose_105")]; tensor var_472_shape_cast_fp16 = shape(x = hidden_states_77_cast_fp16)[name = string("op_472_shape_cast_fp16")]; int32 gather_26 = const()[name = string("gather_26"), val = int32(1)]; int32 gather_27 = const()[name = string("gather_27"), val = int32(8)]; int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)]; int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)]; bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)]; string var_472_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_472_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_28_indices_0_to_uint16 = const()[name = string("gather_28_indices_0_to_uint16"), val = uint16(2)]; tensor var_472_shape_cast_fp16_to_uint16 = cast(dtype = var_472_shape_cast_fp16_to_uint16_dtype_0, x = var_472_shape_cast_fp16)[name = string("cast_383")]; uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = gather_28_indices_0_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_472_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")]; string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_29 = const()[name = string("gather_29"), val = int32(128)]; tensor var_479_axes_0 = const()[name = string("op_479_axes_0"), val = tensor([2])]; tensor var_479_cast_fp16 = expand_dims(axes = var_479_axes_0, x = hidden_states_77_cast_fp16)[name = string("op_479_cast_fp16")]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_382")]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_26, gather_27, var_354, gather_28_cast_uint16_to_int32, gather_29))[name = string("concat_15")]; tensor shape_3_cast_fp16 = shape(x = var_479_cast_fp16)[name = string("shape_3_cast_fp16")]; int32 equal_3_y_0 = const()[name = string("equal_3_y_0"), val = int32(-1)]; tensor equal_3 = equal(x = concat_15, y = equal_3_y_0)[name = string("equal_3")]; tensor select_3 = select(a = shape_3_cast_fp16, b = concat_15, cond = equal_3)[name = string("select_3")]; tensor real_div_3 = real_div(x = select_3, y = shape_3_cast_fp16)[name = string("real_div_3")]; tensor hidden_states_79_cast_fp16 = tile(reps = real_div_3, x = var_479_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 16, -1, 128])]; tensor value_states_3_cast_fp16 = reshape(shape = concat_16x, x = hidden_states_79_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_490_transpose_x_1 = const()[name = string("op_490_transpose_x_1"), val = bool(false)]; bool var_490_transpose_y_1 = const()[name = string("op_490_transpose_y_1"), val = bool(true)]; tensor var_490_cast_fp16 = matmul(transpose_x = var_490_transpose_x_1, transpose_y = var_490_transpose_y_1, x = query_3_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_490_cast_fp16")]; fp16 var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_490_cast_fp16, y = var_491_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor input_15_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_494_cast_fp16 = softmax(axis = var_355, x = input_15_cast_fp16)[name = string("op_494_cast_fp16")]; bool attn_output_5_transpose_x_0 = const()[name = string("attn_output_5_transpose_x_0"), val = bool(false)]; bool attn_output_5_transpose_y_0 = const()[name = string("attn_output_5_transpose_y_0"), val = bool(false)]; tensor attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = var_494_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_498_perm_0 = const()[name = string("op_498_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_17_axis_0 = const()[name = string("concat_17_axis_0"), val = int32(0)]; bool concat_17_interleave_0 = const()[name = string("concat_17_interleave_0"), val = bool(false)]; int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_386")]; tensor concat_17 = concat(axis = concat_17_axis_0, interleave = concat_17_interleave_0, values = (gather_16, gather_17_cast_uint16_to_int32, var_355))[name = string("concat_17")]; tensor var_498_cast_fp16 = transpose(perm = var_498_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_104")]; tensor var_501_cast_fp16 = reshape(shape = concat_17, x = var_498_cast_fp16)[name = string("op_501_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350485056)))]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = var_501_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor hidden_states_83_cast_fp16 = add(x = hidden_states_47_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; fp16 var_354_promoted_3_to_fp16 = const()[name = string("op_354_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_508_cast_fp16 = pow(x = hidden_states_83_cast_fp16, y = var_354_promoted_3_to_fp16)[name = string("op_508_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_508_cast_fp16)[name = string("variance_15_cast_fp16")]; fp16 var_511_to_fp16 = const()[name = string("op_511_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_512_cast_fp16 = add(x = variance_15_cast_fp16, y = var_511_to_fp16)[name = string("op_512_cast_fp16")]; fp32 var_513_epsilon_0 = const()[name = string("op_513_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_513_cast_fp16 = rsqrt(epsilon = var_513_epsilon_0, x = var_512_cast_fp16)[name = string("op_513_cast_fp16")]; tensor hidden_states_87_cast_fp16 = mul(x = hidden_states_83_cast_fp16, y = var_513_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354679424)))]; tensor input_21_cast_fp16 = mul(x = layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_87_cast_fp16)[name = string("input_21_cast_fp16")]; tensor layers_1_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_1_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354681536)))]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_525_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_525_cast_fp16")]; tensor layers_1_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_1_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360973056)))]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_25_cast_fp16 = mul(x = var_525_cast_fp16, y = linear_12_cast_fp16)[name = string("input_25_cast_fp16")]; tensor layers_1_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_1_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367264576)))]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor hidden_states_93_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; int32 var_542 = const()[name = string("op_542"), val = int32(2)]; int32 var_543 = const()[name = string("op_543"), val = int32(-1)]; fp16 var_542_promoted_to_fp16 = const()[name = string("op_542_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_552_cast_fp16 = pow(x = hidden_states_93_cast_fp16, y = var_542_promoted_to_fp16)[name = string("op_552_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_552_cast_fp16)[name = string("variance_17_cast_fp16")]; fp16 var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_556_cast_fp16 = add(x = variance_17_cast_fp16, y = var_555_to_fp16)[name = string("op_556_cast_fp16")]; fp32 var_557_epsilon_0 = const()[name = string("op_557_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_557_cast_fp16 = rsqrt(epsilon = var_557_epsilon_0, x = var_556_cast_fp16)[name = string("op_557_cast_fp16")]; tensor hidden_states_97_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = var_557_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; tensor layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373556096)))]; tensor hidden_states_101_cast_fp16 = mul(x = layers_2_input_layernorm_weight_to_fp16, y = hidden_states_97_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor var_570_shape_cast_fp16 = shape(x = hidden_states_101_cast_fp16)[name = string("op_570_shape_cast_fp16")]; int32 gather_30 = const()[name = string("gather_30"), val = int32(1)]; int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)]; int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)]; bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)]; string var_570_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_570_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_31_indices_0_to_uint16 = const()[name = string("gather_31_indices_0_to_uint16"), val = uint16(1)]; tensor var_570_shape_cast_fp16_to_uint16 = cast(dtype = var_570_shape_cast_fp16_to_uint16_dtype_0, x = var_570_shape_cast_fp16)[name = string("cast_381")]; uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = gather_31_indices_0_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_570_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")]; string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373558208)))]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = hidden_states_101_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor concat_18x = const()[name = string("concat_18x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_103_cast_fp16 = reshape(shape = concat_18x, x = linear_14_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; fp16 var_542_promoted_1_to_fp16 = const()[name = string("op_542_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_578_cast_fp16 = pow(x = hidden_states_103_cast_fp16, y = var_542_promoted_1_to_fp16)[name = string("op_578_cast_fp16")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_578_cast_fp16)[name = string("variance_19_cast_fp16")]; fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_582_cast_fp16 = add(x = variance_19_cast_fp16, y = var_581_to_fp16)[name = string("op_582_cast_fp16")]; fp32 var_583_epsilon_0 = const()[name = string("op_583_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_583_cast_fp16 = rsqrt(epsilon = var_583_epsilon_0, x = var_582_cast_fp16)[name = string("op_583_cast_fp16")]; tensor hidden_states_107_cast_fp16 = mul(x = hidden_states_103_cast_fp16, y = var_583_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377752576)))]; tensor var_586_cast_fp16 = mul(x = layers_2_self_attn_q_norm_weight_to_fp16, y = hidden_states_107_cast_fp16)[name = string("op_586_cast_fp16")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377752896)))]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = hidden_states_101_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_111_cast_fp16 = reshape(shape = concat_19x, x = linear_15_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; fp16 var_542_promoted_2_to_fp16 = const()[name = string("op_542_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_594_cast_fp16 = pow(x = hidden_states_111_cast_fp16, y = var_542_promoted_2_to_fp16)[name = string("op_594_cast_fp16")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_594_cast_fp16)[name = string("variance_21_cast_fp16")]; fp16 var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_598_cast_fp16 = add(x = variance_21_cast_fp16, y = var_597_to_fp16)[name = string("op_598_cast_fp16")]; fp32 var_599_epsilon_0 = const()[name = string("op_599_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_599_cast_fp16 = rsqrt(epsilon = var_599_epsilon_0, x = var_598_cast_fp16)[name = string("op_599_cast_fp16")]; tensor hidden_states_115_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = var_599_cast_fp16)[name = string("hidden_states_115_cast_fp16")]; tensor layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379850112)))]; tensor var_602_cast_fp16 = mul(x = layers_2_self_attn_k_norm_weight_to_fp16, y = hidden_states_115_cast_fp16)[name = string("op_602_cast_fp16")]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379850432)))]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = hidden_states_101_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 8, 128])]; tensor var_607_cast_fp16 = reshape(shape = concat_20x, x = linear_16_cast_fp16)[name = string("op_607_cast_fp16")]; tensor hidden_states_123_perm_0 = const()[name = string("hidden_states_123_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_586_cast_fp16)[name = string("transpose_103")]; tensor var_611_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5_cast_fp16)[name = string("op_611_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_622_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_622_cast_fp16")]; bool var_624_interleave_0 = const()[name = string("op_624_interleave_0"), val = bool(false)]; tensor var_624_cast_fp16 = concat(axis = var_543, interleave = var_624_interleave_0, values = (var_622_cast_fp16, x1_9_cast_fp16))[name = string("op_624_cast_fp16")]; tensor var_625_cast_fp16 = mul(x = var_624_cast_fp16, y = sin_5_cast_fp16)[name = string("op_625_cast_fp16")]; tensor query_5_cast_fp16 = add(x = var_611_cast_fp16, y = var_625_cast_fp16)[name = string("query_5_cast_fp16")]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_602_cast_fp16)[name = string("transpose_102")]; tensor var_627_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5_cast_fp16)[name = string("op_627_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_638_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_638_cast_fp16")]; bool var_640_interleave_0 = const()[name = string("op_640_interleave_0"), val = bool(false)]; tensor var_640_cast_fp16 = concat(axis = var_543, interleave = var_640_interleave_0, values = (var_638_cast_fp16, x1_11_cast_fp16))[name = string("op_640_cast_fp16")]; tensor var_641_cast_fp16 = mul(x = var_640_cast_fp16, y = sin_5_cast_fp16)[name = string("op_641_cast_fp16")]; tensor hidden_states_119_cast_fp16 = add(x = var_627_cast_fp16, y = var_641_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor var_643_shape_cast_fp16 = shape(x = hidden_states_119_cast_fp16)[name = string("op_643_shape_cast_fp16")]; int32 gather_36 = const()[name = string("gather_36"), val = int32(1)]; int32 gather_37 = const()[name = string("gather_37"), val = int32(8)]; int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; string var_643_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_643_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_38_indices_0_to_uint16 = const()[name = string("gather_38_indices_0_to_uint16"), val = uint16(2)]; tensor var_643_shape_cast_fp16_to_uint16 = cast(dtype = var_643_shape_cast_fp16_to_uint16_dtype_0, x = var_643_shape_cast_fp16)[name = string("cast_379")]; uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = gather_38_indices_0_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_643_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_39 = const()[name = string("gather_39"), val = int32(128)]; tensor var_650_axes_0 = const()[name = string("op_650_axes_0"), val = tensor([2])]; tensor var_650_cast_fp16 = expand_dims(axes = var_650_axes_0, x = hidden_states_119_cast_fp16)[name = string("op_650_cast_fp16")]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_378")]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (gather_36, gather_37, var_542, gather_38_cast_uint16_to_int32, gather_39))[name = string("concat_21")]; tensor shape_4_cast_fp16 = shape(x = var_650_cast_fp16)[name = string("shape_4_cast_fp16")]; int32 equal_4_y_0 = const()[name = string("equal_4_y_0"), val = int32(-1)]; tensor equal_4 = equal(x = concat_21, y = equal_4_y_0)[name = string("equal_4")]; tensor select_4 = select(a = shape_4_cast_fp16, b = concat_21, cond = equal_4)[name = string("select_4")]; tensor real_div_4 = real_div(x = select_4, y = shape_4_cast_fp16)[name = string("real_div_4")]; tensor hidden_states_121_cast_fp16 = tile(reps = real_div_4, x = var_650_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, 16, -1, 128])]; tensor key_states_5_cast_fp16 = reshape(shape = concat_22x, x = hidden_states_121_cast_fp16)[name = string("key_states_5_cast_fp16")]; tensor hidden_states_123_cast_fp16 = transpose(perm = hidden_states_123_perm_0, x = var_607_cast_fp16)[name = string("transpose_101")]; tensor var_660_shape_cast_fp16 = shape(x = hidden_states_123_cast_fp16)[name = string("op_660_shape_cast_fp16")]; int32 gather_40 = const()[name = string("gather_40"), val = int32(1)]; int32 gather_41 = const()[name = string("gather_41"), val = int32(8)]; int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)]; int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)]; bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)]; string var_660_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_660_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_42_indices_0_to_uint16 = const()[name = string("gather_42_indices_0_to_uint16"), val = uint16(2)]; tensor var_660_shape_cast_fp16_to_uint16 = cast(dtype = var_660_shape_cast_fp16_to_uint16_dtype_0, x = var_660_shape_cast_fp16)[name = string("cast_377")]; uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = gather_42_indices_0_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_660_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")]; string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_43 = const()[name = string("gather_43"), val = int32(128)]; tensor var_667_axes_0 = const()[name = string("op_667_axes_0"), val = tensor([2])]; tensor var_667_cast_fp16 = expand_dims(axes = var_667_axes_0, x = hidden_states_123_cast_fp16)[name = string("op_667_cast_fp16")]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_376")]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (gather_40, gather_41, var_542, gather_42_cast_uint16_to_int32, gather_43))[name = string("concat_23")]; tensor shape_5_cast_fp16 = shape(x = var_667_cast_fp16)[name = string("shape_5_cast_fp16")]; int32 equal_5_y_0 = const()[name = string("equal_5_y_0"), val = int32(-1)]; tensor equal_5 = equal(x = concat_23, y = equal_5_y_0)[name = string("equal_5")]; tensor select_5 = select(a = shape_5_cast_fp16, b = concat_23, cond = equal_5)[name = string("select_5")]; tensor real_div_5 = real_div(x = select_5, y = shape_5_cast_fp16)[name = string("real_div_5")]; tensor hidden_states_125_cast_fp16 = tile(reps = real_div_5, x = var_667_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor concat_24x = const()[name = string("concat_24x"), val = tensor([1, 16, -1, 128])]; tensor value_states_5_cast_fp16 = reshape(shape = concat_24x, x = hidden_states_125_cast_fp16)[name = string("value_states_5_cast_fp16")]; bool var_678_transpose_x_1 = const()[name = string("op_678_transpose_x_1"), val = bool(false)]; bool var_678_transpose_y_1 = const()[name = string("op_678_transpose_y_1"), val = bool(true)]; tensor var_678_cast_fp16 = matmul(transpose_x = var_678_transpose_x_1, transpose_y = var_678_transpose_y_1, x = query_5_cast_fp16, y = key_states_5_cast_fp16)[name = string("op_678_cast_fp16")]; fp16 var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_9_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor input_27_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_27_cast_fp16")]; tensor var_682_cast_fp16 = softmax(axis = var_543, x = input_27_cast_fp16)[name = string("op_682_cast_fp16")]; bool attn_output_9_transpose_x_0 = const()[name = string("attn_output_9_transpose_x_0"), val = bool(false)]; bool attn_output_9_transpose_y_0 = const()[name = string("attn_output_9_transpose_y_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = var_682_cast_fp16, y = value_states_5_cast_fp16)[name = string("attn_output_9_cast_fp16")]; tensor var_686_perm_0 = const()[name = string("op_686_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_380")]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (gather_30, gather_31_cast_uint16_to_int32, var_543))[name = string("concat_25")]; tensor var_686_cast_fp16 = transpose(perm = var_686_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_100")]; tensor var_689_cast_fp16 = reshape(shape = concat_25, x = var_686_cast_fp16)[name = string("op_689_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381947648)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = var_689_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor hidden_states_129_cast_fp16 = add(x = hidden_states_93_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; fp16 var_542_promoted_3_to_fp16 = const()[name = string("op_542_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_696_cast_fp16 = pow(x = hidden_states_129_cast_fp16, y = var_542_promoted_3_to_fp16)[name = string("op_696_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_696_cast_fp16)[name = string("variance_23_cast_fp16")]; fp16 var_699_to_fp16 = const()[name = string("op_699_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_700_cast_fp16 = add(x = variance_23_cast_fp16, y = var_699_to_fp16)[name = string("op_700_cast_fp16")]; fp32 var_701_epsilon_0 = const()[name = string("op_701_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_701_cast_fp16 = rsqrt(epsilon = var_701_epsilon_0, x = var_700_cast_fp16)[name = string("op_701_cast_fp16")]; tensor hidden_states_133_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = var_701_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; tensor layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386142016)))]; tensor input_33_cast_fp16 = mul(x = layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_133_cast_fp16)[name = string("input_33_cast_fp16")]; tensor layers_2_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_2_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386144128)))]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_713_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_713_cast_fp16")]; tensor layers_2_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_2_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392435648)))]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_37_cast_fp16 = mul(x = var_713_cast_fp16, y = linear_19_cast_fp16)[name = string("input_37_cast_fp16")]; tensor layers_2_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_2_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398727168)))]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor hidden_states_139_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; int32 var_730 = const()[name = string("op_730"), val = int32(2)]; int32 var_731 = const()[name = string("op_731"), val = int32(-1)]; fp16 var_730_promoted_to_fp16 = const()[name = string("op_730_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_740_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_730_promoted_to_fp16)[name = string("op_740_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_740_cast_fp16)[name = string("variance_25_cast_fp16")]; fp16 var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_744_cast_fp16 = add(x = variance_25_cast_fp16, y = var_743_to_fp16)[name = string("op_744_cast_fp16")]; fp32 var_745_epsilon_0 = const()[name = string("op_745_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_745_cast_fp16 = rsqrt(epsilon = var_745_epsilon_0, x = var_744_cast_fp16)[name = string("op_745_cast_fp16")]; tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_745_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405018688)))]; tensor hidden_states_147_cast_fp16 = mul(x = layers_3_input_layernorm_weight_to_fp16, y = hidden_states_143_cast_fp16)[name = string("hidden_states_147_cast_fp16")]; tensor var_758_shape_cast_fp16 = shape(x = hidden_states_147_cast_fp16)[name = string("op_758_shape_cast_fp16")]; int32 gather_44 = const()[name = string("gather_44"), val = int32(1)]; int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)]; int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)]; bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)]; string var_758_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_758_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_45_indices_0_to_uint16 = const()[name = string("gather_45_indices_0_to_uint16"), val = uint16(1)]; tensor var_758_shape_cast_fp16_to_uint16 = cast(dtype = var_758_shape_cast_fp16_to_uint16_dtype_0, x = var_758_shape_cast_fp16)[name = string("cast_375")]; uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = gather_45_indices_0_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_758_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")]; string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405020800)))]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = hidden_states_147_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor concat_26x = const()[name = string("concat_26x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_149_cast_fp16 = reshape(shape = concat_26x, x = linear_21_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; fp16 var_730_promoted_1_to_fp16 = const()[name = string("op_730_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_766_cast_fp16 = pow(x = hidden_states_149_cast_fp16, y = var_730_promoted_1_to_fp16)[name = string("op_766_cast_fp16")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_766_cast_fp16)[name = string("variance_27_cast_fp16")]; fp16 var_769_to_fp16 = const()[name = string("op_769_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_770_cast_fp16 = add(x = variance_27_cast_fp16, y = var_769_to_fp16)[name = string("op_770_cast_fp16")]; fp32 var_771_epsilon_0 = const()[name = string("op_771_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_771_cast_fp16 = rsqrt(epsilon = var_771_epsilon_0, x = var_770_cast_fp16)[name = string("op_771_cast_fp16")]; tensor hidden_states_153_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = var_771_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; tensor layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409215168)))]; tensor var_774_cast_fp16 = mul(x = layers_3_self_attn_q_norm_weight_to_fp16, y = hidden_states_153_cast_fp16)[name = string("op_774_cast_fp16")]; tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409215488)))]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = hidden_states_147_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor concat_27x = const()[name = string("concat_27x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_157_cast_fp16 = reshape(shape = concat_27x, x = linear_22_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; fp16 var_730_promoted_2_to_fp16 = const()[name = string("op_730_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_782_cast_fp16 = pow(x = hidden_states_157_cast_fp16, y = var_730_promoted_2_to_fp16)[name = string("op_782_cast_fp16")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_782_cast_fp16)[name = string("variance_29_cast_fp16")]; fp16 var_785_to_fp16 = const()[name = string("op_785_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_786_cast_fp16 = add(x = variance_29_cast_fp16, y = var_785_to_fp16)[name = string("op_786_cast_fp16")]; fp32 var_787_epsilon_0 = const()[name = string("op_787_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_787_cast_fp16 = rsqrt(epsilon = var_787_epsilon_0, x = var_786_cast_fp16)[name = string("op_787_cast_fp16")]; tensor hidden_states_161_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = var_787_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411312704)))]; tensor var_790_cast_fp16 = mul(x = layers_3_self_attn_k_norm_weight_to_fp16, y = hidden_states_161_cast_fp16)[name = string("op_790_cast_fp16")]; tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411313024)))]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = hidden_states_147_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor concat_28x = const()[name = string("concat_28x"), val = tensor([1, -1, 8, 128])]; tensor var_795_cast_fp16 = reshape(shape = concat_28x, x = linear_23_cast_fp16)[name = string("op_795_cast_fp16")]; tensor hidden_states_169_perm_0 = const()[name = string("hidden_states_169_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_774_cast_fp16)[name = string("transpose_99")]; tensor var_799_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5_cast_fp16)[name = string("op_799_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_810_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_810_cast_fp16")]; bool var_812_interleave_0 = const()[name = string("op_812_interleave_0"), val = bool(false)]; tensor var_812_cast_fp16 = concat(axis = var_731, interleave = var_812_interleave_0, values = (var_810_cast_fp16, x1_13_cast_fp16))[name = string("op_812_cast_fp16")]; tensor var_813_cast_fp16 = mul(x = var_812_cast_fp16, y = sin_5_cast_fp16)[name = string("op_813_cast_fp16")]; tensor query_7_cast_fp16 = add(x = var_799_cast_fp16, y = var_813_cast_fp16)[name = string("query_7_cast_fp16")]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_790_cast_fp16)[name = string("transpose_98")]; tensor var_815_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5_cast_fp16)[name = string("op_815_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_826_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_826_cast_fp16")]; bool var_828_interleave_0 = const()[name = string("op_828_interleave_0"), val = bool(false)]; tensor var_828_cast_fp16 = concat(axis = var_731, interleave = var_828_interleave_0, values = (var_826_cast_fp16, x1_15_cast_fp16))[name = string("op_828_cast_fp16")]; tensor var_829_cast_fp16 = mul(x = var_828_cast_fp16, y = sin_5_cast_fp16)[name = string("op_829_cast_fp16")]; tensor hidden_states_165_cast_fp16 = add(x = var_815_cast_fp16, y = var_829_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; tensor var_831_shape_cast_fp16 = shape(x = hidden_states_165_cast_fp16)[name = string("op_831_shape_cast_fp16")]; int32 gather_50 = const()[name = string("gather_50"), val = int32(1)]; int32 gather_51 = const()[name = string("gather_51"), val = int32(8)]; int32 gather_52_axis_0 = const()[name = string("gather_52_axis_0"), val = int32(0)]; int32 gather_52_batch_dims_0 = const()[name = string("gather_52_batch_dims_0"), val = int32(0)]; bool gather_52_validate_indices_0 = const()[name = string("gather_52_validate_indices_0"), val = bool(false)]; string var_831_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_831_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_52_indices_0_to_uint16 = const()[name = string("gather_52_indices_0_to_uint16"), val = uint16(2)]; tensor var_831_shape_cast_fp16_to_uint16 = cast(dtype = var_831_shape_cast_fp16_to_uint16_dtype_0, x = var_831_shape_cast_fp16)[name = string("cast_373")]; uint16 gather_52_cast_uint16 = gather(axis = gather_52_axis_0, batch_dims = gather_52_batch_dims_0, indices = gather_52_indices_0_to_uint16, validate_indices = gather_52_validate_indices_0, x = var_831_shape_cast_fp16_to_uint16)[name = string("gather_52_cast_uint16")]; string gather_52_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_52_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_53 = const()[name = string("gather_53"), val = int32(128)]; tensor var_838_axes_0 = const()[name = string("op_838_axes_0"), val = tensor([2])]; tensor var_838_cast_fp16 = expand_dims(axes = var_838_axes_0, x = hidden_states_165_cast_fp16)[name = string("op_838_cast_fp16")]; int32 concat_29_axis_0 = const()[name = string("concat_29_axis_0"), val = int32(0)]; bool concat_29_interleave_0 = const()[name = string("concat_29_interleave_0"), val = bool(false)]; int32 gather_52_cast_uint16_to_int32 = cast(dtype = gather_52_cast_uint16_to_int32_dtype_0, x = gather_52_cast_uint16)[name = string("cast_372")]; tensor concat_29 = concat(axis = concat_29_axis_0, interleave = concat_29_interleave_0, values = (gather_50, gather_51, var_730, gather_52_cast_uint16_to_int32, gather_53))[name = string("concat_29")]; tensor shape_6_cast_fp16 = shape(x = var_838_cast_fp16)[name = string("shape_6_cast_fp16")]; int32 equal_6_y_0 = const()[name = string("equal_6_y_0"), val = int32(-1)]; tensor equal_6 = equal(x = concat_29, y = equal_6_y_0)[name = string("equal_6")]; tensor select_6 = select(a = shape_6_cast_fp16, b = concat_29, cond = equal_6)[name = string("select_6")]; tensor real_div_6 = real_div(x = select_6, y = shape_6_cast_fp16)[name = string("real_div_6")]; tensor hidden_states_167_cast_fp16 = tile(reps = real_div_6, x = var_838_cast_fp16)[name = string("hidden_states_167_cast_fp16")]; tensor concat_30x = const()[name = string("concat_30x"), val = tensor([1, 16, -1, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = concat_30x, x = hidden_states_167_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor hidden_states_169_cast_fp16 = transpose(perm = hidden_states_169_perm_0, x = var_795_cast_fp16)[name = string("transpose_97")]; tensor var_848_shape_cast_fp16 = shape(x = hidden_states_169_cast_fp16)[name = string("op_848_shape_cast_fp16")]; int32 gather_54 = const()[name = string("gather_54"), val = int32(1)]; int32 gather_55 = const()[name = string("gather_55"), val = int32(8)]; int32 gather_56_axis_0 = const()[name = string("gather_56_axis_0"), val = int32(0)]; int32 gather_56_batch_dims_0 = const()[name = string("gather_56_batch_dims_0"), val = int32(0)]; bool gather_56_validate_indices_0 = const()[name = string("gather_56_validate_indices_0"), val = bool(false)]; string var_848_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_848_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_56_indices_0_to_uint16 = const()[name = string("gather_56_indices_0_to_uint16"), val = uint16(2)]; tensor var_848_shape_cast_fp16_to_uint16 = cast(dtype = var_848_shape_cast_fp16_to_uint16_dtype_0, x = var_848_shape_cast_fp16)[name = string("cast_371")]; uint16 gather_56_cast_uint16 = gather(axis = gather_56_axis_0, batch_dims = gather_56_batch_dims_0, indices = gather_56_indices_0_to_uint16, validate_indices = gather_56_validate_indices_0, x = var_848_shape_cast_fp16_to_uint16)[name = string("gather_56_cast_uint16")]; string gather_56_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_56_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_57 = const()[name = string("gather_57"), val = int32(128)]; tensor var_855_axes_0 = const()[name = string("op_855_axes_0"), val = tensor([2])]; tensor var_855_cast_fp16 = expand_dims(axes = var_855_axes_0, x = hidden_states_169_cast_fp16)[name = string("op_855_cast_fp16")]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; int32 gather_56_cast_uint16_to_int32 = cast(dtype = gather_56_cast_uint16_to_int32_dtype_0, x = gather_56_cast_uint16)[name = string("cast_370")]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (gather_54, gather_55, var_730, gather_56_cast_uint16_to_int32, gather_57))[name = string("concat_31")]; tensor shape_7_cast_fp16 = shape(x = var_855_cast_fp16)[name = string("shape_7_cast_fp16")]; int32 equal_7_y_0 = const()[name = string("equal_7_y_0"), val = int32(-1)]; tensor equal_7 = equal(x = concat_31, y = equal_7_y_0)[name = string("equal_7")]; tensor select_7 = select(a = shape_7_cast_fp16, b = concat_31, cond = equal_7)[name = string("select_7")]; tensor real_div_7 = real_div(x = select_7, y = shape_7_cast_fp16)[name = string("real_div_7")]; tensor hidden_states_171_cast_fp16 = tile(reps = real_div_7, x = var_855_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; tensor concat_32x = const()[name = string("concat_32x"), val = tensor([1, 16, -1, 128])]; tensor value_states_7_cast_fp16 = reshape(shape = concat_32x, x = hidden_states_171_cast_fp16)[name = string("value_states_7_cast_fp16")]; bool var_866_transpose_x_1 = const()[name = string("op_866_transpose_x_1"), val = bool(false)]; bool var_866_transpose_y_1 = const()[name = string("op_866_transpose_y_1"), val = bool(true)]; tensor var_866_cast_fp16 = matmul(transpose_x = var_866_transpose_x_1, transpose_y = var_866_transpose_y_1, x = query_7_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_866_cast_fp16")]; fp16 var_867_to_fp16 = const()[name = string("op_867_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_866_cast_fp16, y = var_867_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor input_39_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_39_cast_fp16")]; tensor var_870_cast_fp16 = softmax(axis = var_731, x = input_39_cast_fp16)[name = string("op_870_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_870_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_874_perm_0 = const()[name = string("op_874_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_374")]; tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (gather_44, gather_45_cast_uint16_to_int32, var_731))[name = string("concat_33")]; tensor var_874_cast_fp16 = transpose(perm = var_874_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_96")]; tensor var_877_cast_fp16 = reshape(shape = concat_33, x = var_874_cast_fp16)[name = string("op_877_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413410240)))]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = var_877_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor hidden_states_175_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; fp16 var_730_promoted_3_to_fp16 = const()[name = string("op_730_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_884_cast_fp16 = pow(x = hidden_states_175_cast_fp16, y = var_730_promoted_3_to_fp16)[name = string("op_884_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_884_cast_fp16)[name = string("variance_31_cast_fp16")]; fp16 var_887_to_fp16 = const()[name = string("op_887_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_888_cast_fp16 = add(x = variance_31_cast_fp16, y = var_887_to_fp16)[name = string("op_888_cast_fp16")]; fp32 var_889_epsilon_0 = const()[name = string("op_889_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_889_cast_fp16 = rsqrt(epsilon = var_889_epsilon_0, x = var_888_cast_fp16)[name = string("op_889_cast_fp16")]; tensor hidden_states_179_cast_fp16 = mul(x = hidden_states_175_cast_fp16, y = var_889_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; tensor layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417604608)))]; tensor input_45_cast_fp16 = mul(x = layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_179_cast_fp16)[name = string("input_45_cast_fp16")]; tensor layers_3_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_3_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417606720)))]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_901_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_901_cast_fp16")]; tensor layers_3_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_3_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423898240)))]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_49_cast_fp16 = mul(x = var_901_cast_fp16, y = linear_26_cast_fp16)[name = string("input_49_cast_fp16")]; tensor layers_3_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_3_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430189760)))]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor hidden_states_185_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; int32 var_918 = const()[name = string("op_918"), val = int32(2)]; int32 var_919 = const()[name = string("op_919"), val = int32(-1)]; fp16 var_918_promoted_to_fp16 = const()[name = string("op_918_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_928_cast_fp16 = pow(x = hidden_states_185_cast_fp16, y = var_918_promoted_to_fp16)[name = string("op_928_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_928_cast_fp16)[name = string("variance_33_cast_fp16")]; fp16 var_931_to_fp16 = const()[name = string("op_931_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_932_cast_fp16 = add(x = variance_33_cast_fp16, y = var_931_to_fp16)[name = string("op_932_cast_fp16")]; fp32 var_933_epsilon_0 = const()[name = string("op_933_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_933_cast_fp16 = rsqrt(epsilon = var_933_epsilon_0, x = var_932_cast_fp16)[name = string("op_933_cast_fp16")]; tensor hidden_states_189_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = var_933_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; tensor layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436481280)))]; tensor hidden_states_193_cast_fp16 = mul(x = layers_4_input_layernorm_weight_to_fp16, y = hidden_states_189_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; tensor var_946_shape_cast_fp16 = shape(x = hidden_states_193_cast_fp16)[name = string("op_946_shape_cast_fp16")]; int32 gather_58 = const()[name = string("gather_58"), val = int32(1)]; int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; string var_946_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_946_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_59_indices_0_to_uint16 = const()[name = string("gather_59_indices_0_to_uint16"), val = uint16(1)]; tensor var_946_shape_cast_fp16_to_uint16 = cast(dtype = var_946_shape_cast_fp16_to_uint16_dtype_0, x = var_946_shape_cast_fp16)[name = string("cast_369")]; uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = gather_59_indices_0_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_946_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436483392)))]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = hidden_states_193_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_195_cast_fp16 = reshape(shape = concat_34x, x = linear_28_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; fp16 var_918_promoted_1_to_fp16 = const()[name = string("op_918_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_954_cast_fp16 = pow(x = hidden_states_195_cast_fp16, y = var_918_promoted_1_to_fp16)[name = string("op_954_cast_fp16")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_954_cast_fp16)[name = string("variance_35_cast_fp16")]; fp16 var_957_to_fp16 = const()[name = string("op_957_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_958_cast_fp16 = add(x = variance_35_cast_fp16, y = var_957_to_fp16)[name = string("op_958_cast_fp16")]; fp32 var_959_epsilon_0 = const()[name = string("op_959_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_959_cast_fp16 = rsqrt(epsilon = var_959_epsilon_0, x = var_958_cast_fp16)[name = string("op_959_cast_fp16")]; tensor hidden_states_199_cast_fp16 = mul(x = hidden_states_195_cast_fp16, y = var_959_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; tensor layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440677760)))]; tensor var_962_cast_fp16 = mul(x = layers_4_self_attn_q_norm_weight_to_fp16, y = hidden_states_199_cast_fp16)[name = string("op_962_cast_fp16")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440678080)))]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = hidden_states_193_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_203_cast_fp16 = reshape(shape = concat_35x, x = linear_29_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; fp16 var_918_promoted_2_to_fp16 = const()[name = string("op_918_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_970_cast_fp16 = pow(x = hidden_states_203_cast_fp16, y = var_918_promoted_2_to_fp16)[name = string("op_970_cast_fp16")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_970_cast_fp16)[name = string("variance_37_cast_fp16")]; fp16 var_973_to_fp16 = const()[name = string("op_973_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_974_cast_fp16 = add(x = variance_37_cast_fp16, y = var_973_to_fp16)[name = string("op_974_cast_fp16")]; fp32 var_975_epsilon_0 = const()[name = string("op_975_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_975_cast_fp16 = rsqrt(epsilon = var_975_epsilon_0, x = var_974_cast_fp16)[name = string("op_975_cast_fp16")]; tensor hidden_states_207_cast_fp16 = mul(x = hidden_states_203_cast_fp16, y = var_975_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; tensor layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442775296)))]; tensor var_978_cast_fp16 = mul(x = layers_4_self_attn_k_norm_weight_to_fp16, y = hidden_states_207_cast_fp16)[name = string("op_978_cast_fp16")]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442775616)))]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = hidden_states_193_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 8, 128])]; tensor var_983_cast_fp16 = reshape(shape = concat_36x, x = linear_30_cast_fp16)[name = string("op_983_cast_fp16")]; tensor hidden_states_215_perm_0 = const()[name = string("hidden_states_215_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_962_cast_fp16)[name = string("transpose_95")]; tensor var_987_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5_cast_fp16)[name = string("op_987_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_998_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_998_cast_fp16")]; bool var_1000_interleave_0 = const()[name = string("op_1000_interleave_0"), val = bool(false)]; tensor var_1000_cast_fp16 = concat(axis = var_919, interleave = var_1000_interleave_0, values = (var_998_cast_fp16, x1_17_cast_fp16))[name = string("op_1000_cast_fp16")]; tensor var_1001_cast_fp16 = mul(x = var_1000_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1001_cast_fp16")]; tensor query_9_cast_fp16 = add(x = var_987_cast_fp16, y = var_1001_cast_fp16)[name = string("query_9_cast_fp16")]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_978_cast_fp16)[name = string("transpose_94")]; tensor var_1003_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1003_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1014_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1014_cast_fp16")]; bool var_1016_interleave_0 = const()[name = string("op_1016_interleave_0"), val = bool(false)]; tensor var_1016_cast_fp16 = concat(axis = var_919, interleave = var_1016_interleave_0, values = (var_1014_cast_fp16, x1_19_cast_fp16))[name = string("op_1016_cast_fp16")]; tensor var_1017_cast_fp16 = mul(x = var_1016_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1017_cast_fp16")]; tensor hidden_states_211_cast_fp16 = add(x = var_1003_cast_fp16, y = var_1017_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; tensor var_1019_shape_cast_fp16 = shape(x = hidden_states_211_cast_fp16)[name = string("op_1019_shape_cast_fp16")]; int32 gather_64 = const()[name = string("gather_64"), val = int32(1)]; int32 gather_65 = const()[name = string("gather_65"), val = int32(8)]; int32 gather_66_axis_0 = const()[name = string("gather_66_axis_0"), val = int32(0)]; int32 gather_66_batch_dims_0 = const()[name = string("gather_66_batch_dims_0"), val = int32(0)]; bool gather_66_validate_indices_0 = const()[name = string("gather_66_validate_indices_0"), val = bool(false)]; string var_1019_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1019_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_66_indices_0_to_uint16 = const()[name = string("gather_66_indices_0_to_uint16"), val = uint16(2)]; tensor var_1019_shape_cast_fp16_to_uint16 = cast(dtype = var_1019_shape_cast_fp16_to_uint16_dtype_0, x = var_1019_shape_cast_fp16)[name = string("cast_367")]; uint16 gather_66_cast_uint16 = gather(axis = gather_66_axis_0, batch_dims = gather_66_batch_dims_0, indices = gather_66_indices_0_to_uint16, validate_indices = gather_66_validate_indices_0, x = var_1019_shape_cast_fp16_to_uint16)[name = string("gather_66_cast_uint16")]; string gather_66_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_66_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_67 = const()[name = string("gather_67"), val = int32(128)]; tensor var_1026_axes_0 = const()[name = string("op_1026_axes_0"), val = tensor([2])]; tensor var_1026_cast_fp16 = expand_dims(axes = var_1026_axes_0, x = hidden_states_211_cast_fp16)[name = string("op_1026_cast_fp16")]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; int32 gather_66_cast_uint16_to_int32 = cast(dtype = gather_66_cast_uint16_to_int32_dtype_0, x = gather_66_cast_uint16)[name = string("cast_366")]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_64, gather_65, var_918, gather_66_cast_uint16_to_int32, gather_67))[name = string("concat_37")]; tensor shape_8_cast_fp16 = shape(x = var_1026_cast_fp16)[name = string("shape_8_cast_fp16")]; int32 equal_8_y_0 = const()[name = string("equal_8_y_0"), val = int32(-1)]; tensor equal_8 = equal(x = concat_37, y = equal_8_y_0)[name = string("equal_8")]; tensor select_8 = select(a = shape_8_cast_fp16, b = concat_37, cond = equal_8)[name = string("select_8")]; tensor real_div_8 = real_div(x = select_8, y = shape_8_cast_fp16)[name = string("real_div_8")]; tensor hidden_states_213_cast_fp16 = tile(reps = real_div_8, x = var_1026_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, 16, -1, 128])]; tensor key_states_9_cast_fp16 = reshape(shape = concat_38x, x = hidden_states_213_cast_fp16)[name = string("key_states_9_cast_fp16")]; tensor hidden_states_215_cast_fp16 = transpose(perm = hidden_states_215_perm_0, x = var_983_cast_fp16)[name = string("transpose_93")]; tensor var_1036_shape_cast_fp16 = shape(x = hidden_states_215_cast_fp16)[name = string("op_1036_shape_cast_fp16")]; int32 gather_68 = const()[name = string("gather_68"), val = int32(1)]; int32 gather_69 = const()[name = string("gather_69"), val = int32(8)]; int32 gather_70_axis_0 = const()[name = string("gather_70_axis_0"), val = int32(0)]; int32 gather_70_batch_dims_0 = const()[name = string("gather_70_batch_dims_0"), val = int32(0)]; bool gather_70_validate_indices_0 = const()[name = string("gather_70_validate_indices_0"), val = bool(false)]; string var_1036_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1036_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_70_indices_0_to_uint16 = const()[name = string("gather_70_indices_0_to_uint16"), val = uint16(2)]; tensor var_1036_shape_cast_fp16_to_uint16 = cast(dtype = var_1036_shape_cast_fp16_to_uint16_dtype_0, x = var_1036_shape_cast_fp16)[name = string("cast_365")]; uint16 gather_70_cast_uint16 = gather(axis = gather_70_axis_0, batch_dims = gather_70_batch_dims_0, indices = gather_70_indices_0_to_uint16, validate_indices = gather_70_validate_indices_0, x = var_1036_shape_cast_fp16_to_uint16)[name = string("gather_70_cast_uint16")]; string gather_70_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_70_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_71 = const()[name = string("gather_71"), val = int32(128)]; tensor var_1043_axes_0 = const()[name = string("op_1043_axes_0"), val = tensor([2])]; tensor var_1043_cast_fp16 = expand_dims(axes = var_1043_axes_0, x = hidden_states_215_cast_fp16)[name = string("op_1043_cast_fp16")]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; int32 gather_70_cast_uint16_to_int32 = cast(dtype = gather_70_cast_uint16_to_int32_dtype_0, x = gather_70_cast_uint16)[name = string("cast_364")]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (gather_68, gather_69, var_918, gather_70_cast_uint16_to_int32, gather_71))[name = string("concat_39")]; tensor shape_9_cast_fp16 = shape(x = var_1043_cast_fp16)[name = string("shape_9_cast_fp16")]; int32 equal_9_y_0 = const()[name = string("equal_9_y_0"), val = int32(-1)]; tensor equal_9 = equal(x = concat_39, y = equal_9_y_0)[name = string("equal_9")]; tensor select_9 = select(a = shape_9_cast_fp16, b = concat_39, cond = equal_9)[name = string("select_9")]; tensor real_div_9 = real_div(x = select_9, y = shape_9_cast_fp16)[name = string("real_div_9")]; tensor hidden_states_217_cast_fp16 = tile(reps = real_div_9, x = var_1043_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, 16, -1, 128])]; tensor value_states_9_cast_fp16 = reshape(shape = concat_40x, x = hidden_states_217_cast_fp16)[name = string("value_states_9_cast_fp16")]; bool var_1054_transpose_x_1 = const()[name = string("op_1054_transpose_x_1"), val = bool(false)]; bool var_1054_transpose_y_1 = const()[name = string("op_1054_transpose_y_1"), val = bool(true)]; tensor var_1054_cast_fp16 = matmul(transpose_x = var_1054_transpose_x_1, transpose_y = var_1054_transpose_y_1, x = query_9_cast_fp16, y = key_states_9_cast_fp16)[name = string("op_1054_cast_fp16")]; fp16 var_1055_to_fp16 = const()[name = string("op_1055_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_17_cast_fp16 = mul(x = var_1054_cast_fp16, y = var_1055_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor input_51_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_51_cast_fp16")]; tensor var_1058_cast_fp16 = softmax(axis = var_919, x = input_51_cast_fp16)[name = string("op_1058_cast_fp16")]; bool attn_output_17_transpose_x_0 = const()[name = string("attn_output_17_transpose_x_0"), val = bool(false)]; bool attn_output_17_transpose_y_0 = const()[name = string("attn_output_17_transpose_y_0"), val = bool(false)]; tensor attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = var_1058_cast_fp16, y = value_states_9_cast_fp16)[name = string("attn_output_17_cast_fp16")]; tensor var_1062_perm_0 = const()[name = string("op_1062_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_41_axis_0 = const()[name = string("concat_41_axis_0"), val = int32(0)]; bool concat_41_interleave_0 = const()[name = string("concat_41_interleave_0"), val = bool(false)]; int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_368")]; tensor concat_41 = concat(axis = concat_41_axis_0, interleave = concat_41_interleave_0, values = (gather_58, gather_59_cast_uint16_to_int32, var_919))[name = string("concat_41")]; tensor var_1062_cast_fp16 = transpose(perm = var_1062_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_92")]; tensor var_1065_cast_fp16 = reshape(shape = concat_41, x = var_1062_cast_fp16)[name = string("op_1065_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444872832)))]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = var_1065_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor hidden_states_221_cast_fp16 = add(x = hidden_states_185_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; fp16 var_918_promoted_3_to_fp16 = const()[name = string("op_918_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1072_cast_fp16 = pow(x = hidden_states_221_cast_fp16, y = var_918_promoted_3_to_fp16)[name = string("op_1072_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1072_cast_fp16)[name = string("variance_39_cast_fp16")]; fp16 var_1075_to_fp16 = const()[name = string("op_1075_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1076_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1075_to_fp16)[name = string("op_1076_cast_fp16")]; fp32 var_1077_epsilon_0 = const()[name = string("op_1077_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1077_cast_fp16 = rsqrt(epsilon = var_1077_epsilon_0, x = var_1076_cast_fp16)[name = string("op_1077_cast_fp16")]; tensor hidden_states_225_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = var_1077_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; tensor layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449067200)))]; tensor input_57_cast_fp16 = mul(x = layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_225_cast_fp16)[name = string("input_57_cast_fp16")]; tensor layers_4_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_4_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449069312)))]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1089_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1089_cast_fp16")]; tensor layers_4_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_4_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455360832)))]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_61_cast_fp16 = mul(x = var_1089_cast_fp16, y = linear_33_cast_fp16)[name = string("input_61_cast_fp16")]; tensor layers_4_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_4_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461652352)))]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor hidden_states_231_cast_fp16 = add(x = hidden_states_221_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_231_cast_fp16")]; int32 var_1106 = const()[name = string("op_1106"), val = int32(2)]; int32 var_1107 = const()[name = string("op_1107"), val = int32(-1)]; fp16 var_1106_promoted_to_fp16 = const()[name = string("op_1106_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1116_cast_fp16 = pow(x = hidden_states_231_cast_fp16, y = var_1106_promoted_to_fp16)[name = string("op_1116_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1116_cast_fp16)[name = string("variance_41_cast_fp16")]; fp16 var_1119_to_fp16 = const()[name = string("op_1119_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1120_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1119_to_fp16)[name = string("op_1120_cast_fp16")]; fp32 var_1121_epsilon_0 = const()[name = string("op_1121_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1121_cast_fp16 = rsqrt(epsilon = var_1121_epsilon_0, x = var_1120_cast_fp16)[name = string("op_1121_cast_fp16")]; tensor hidden_states_235_cast_fp16 = mul(x = hidden_states_231_cast_fp16, y = var_1121_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; tensor layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467943872)))]; tensor hidden_states_239_cast_fp16 = mul(x = layers_5_input_layernorm_weight_to_fp16, y = hidden_states_235_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; tensor var_1134_shape_cast_fp16 = shape(x = hidden_states_239_cast_fp16)[name = string("op_1134_shape_cast_fp16")]; int32 gather_72 = const()[name = string("gather_72"), val = int32(1)]; int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; string var_1134_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1134_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_73_indices_0_to_uint16 = const()[name = string("gather_73_indices_0_to_uint16"), val = uint16(1)]; tensor var_1134_shape_cast_fp16_to_uint16 = cast(dtype = var_1134_shape_cast_fp16_to_uint16_dtype_0, x = var_1134_shape_cast_fp16)[name = string("cast_363")]; uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = gather_73_indices_0_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_1134_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467945984)))]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = hidden_states_239_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor concat_42x = const()[name = string("concat_42x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_241_cast_fp16 = reshape(shape = concat_42x, x = linear_35_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; fp16 var_1106_promoted_1_to_fp16 = const()[name = string("op_1106_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_1142_cast_fp16 = pow(x = hidden_states_241_cast_fp16, y = var_1106_promoted_1_to_fp16)[name = string("op_1142_cast_fp16")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1142_cast_fp16)[name = string("variance_43_cast_fp16")]; fp16 var_1145_to_fp16 = const()[name = string("op_1145_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1146_cast_fp16 = add(x = variance_43_cast_fp16, y = var_1145_to_fp16)[name = string("op_1146_cast_fp16")]; fp32 var_1147_epsilon_0 = const()[name = string("op_1147_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1147_cast_fp16 = rsqrt(epsilon = var_1147_epsilon_0, x = var_1146_cast_fp16)[name = string("op_1147_cast_fp16")]; tensor hidden_states_245_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = var_1147_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; tensor layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472140352)))]; tensor var_1150_cast_fp16 = mul(x = layers_5_self_attn_q_norm_weight_to_fp16, y = hidden_states_245_cast_fp16)[name = string("op_1150_cast_fp16")]; tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472140672)))]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = hidden_states_239_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_43x = const()[name = string("concat_43x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_249_cast_fp16 = reshape(shape = concat_43x, x = linear_36_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; fp16 var_1106_promoted_2_to_fp16 = const()[name = string("op_1106_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_1158_cast_fp16 = pow(x = hidden_states_249_cast_fp16, y = var_1106_promoted_2_to_fp16)[name = string("op_1158_cast_fp16")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1158_cast_fp16)[name = string("variance_45_cast_fp16")]; fp16 var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1162_cast_fp16 = add(x = variance_45_cast_fp16, y = var_1161_to_fp16)[name = string("op_1162_cast_fp16")]; fp32 var_1163_epsilon_0 = const()[name = string("op_1163_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1163_cast_fp16 = rsqrt(epsilon = var_1163_epsilon_0, x = var_1162_cast_fp16)[name = string("op_1163_cast_fp16")]; tensor hidden_states_253_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = var_1163_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; tensor layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474237888)))]; tensor var_1166_cast_fp16 = mul(x = layers_5_self_attn_k_norm_weight_to_fp16, y = hidden_states_253_cast_fp16)[name = string("op_1166_cast_fp16")]; tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474238208)))]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = hidden_states_239_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 8, 128])]; tensor var_1171_cast_fp16 = reshape(shape = concat_44x, x = linear_37_cast_fp16)[name = string("op_1171_cast_fp16")]; tensor hidden_states_261_perm_0 = const()[name = string("hidden_states_261_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1150_cast_fp16)[name = string("transpose_91")]; tensor var_1175_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1175_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1186_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1186_cast_fp16")]; bool var_1188_interleave_0 = const()[name = string("op_1188_interleave_0"), val = bool(false)]; tensor var_1188_cast_fp16 = concat(axis = var_1107, interleave = var_1188_interleave_0, values = (var_1186_cast_fp16, x1_21_cast_fp16))[name = string("op_1188_cast_fp16")]; tensor var_1189_cast_fp16 = mul(x = var_1188_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1189_cast_fp16")]; tensor query_11_cast_fp16 = add(x = var_1175_cast_fp16, y = var_1189_cast_fp16)[name = string("query_11_cast_fp16")]; tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1166_cast_fp16)[name = string("transpose_90")]; tensor var_1191_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1191_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1202_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1202_cast_fp16")]; bool var_1204_interleave_0 = const()[name = string("op_1204_interleave_0"), val = bool(false)]; tensor var_1204_cast_fp16 = concat(axis = var_1107, interleave = var_1204_interleave_0, values = (var_1202_cast_fp16, x1_23_cast_fp16))[name = string("op_1204_cast_fp16")]; tensor var_1205_cast_fp16 = mul(x = var_1204_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1205_cast_fp16")]; tensor hidden_states_257_cast_fp16 = add(x = var_1191_cast_fp16, y = var_1205_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; tensor var_1207_shape_cast_fp16 = shape(x = hidden_states_257_cast_fp16)[name = string("op_1207_shape_cast_fp16")]; int32 gather_78 = const()[name = string("gather_78"), val = int32(1)]; int32 gather_79 = const()[name = string("gather_79"), val = int32(8)]; int32 gather_80_axis_0 = const()[name = string("gather_80_axis_0"), val = int32(0)]; int32 gather_80_batch_dims_0 = const()[name = string("gather_80_batch_dims_0"), val = int32(0)]; bool gather_80_validate_indices_0 = const()[name = string("gather_80_validate_indices_0"), val = bool(false)]; string var_1207_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1207_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_80_indices_0_to_uint16 = const()[name = string("gather_80_indices_0_to_uint16"), val = uint16(2)]; tensor var_1207_shape_cast_fp16_to_uint16 = cast(dtype = var_1207_shape_cast_fp16_to_uint16_dtype_0, x = var_1207_shape_cast_fp16)[name = string("cast_361")]; uint16 gather_80_cast_uint16 = gather(axis = gather_80_axis_0, batch_dims = gather_80_batch_dims_0, indices = gather_80_indices_0_to_uint16, validate_indices = gather_80_validate_indices_0, x = var_1207_shape_cast_fp16_to_uint16)[name = string("gather_80_cast_uint16")]; string gather_80_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_80_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_81 = const()[name = string("gather_81"), val = int32(128)]; tensor var_1214_axes_0 = const()[name = string("op_1214_axes_0"), val = tensor([2])]; tensor var_1214_cast_fp16 = expand_dims(axes = var_1214_axes_0, x = hidden_states_257_cast_fp16)[name = string("op_1214_cast_fp16")]; int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)]; bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)]; int32 gather_80_cast_uint16_to_int32 = cast(dtype = gather_80_cast_uint16_to_int32_dtype_0, x = gather_80_cast_uint16)[name = string("cast_360")]; tensor concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (gather_78, gather_79, var_1106, gather_80_cast_uint16_to_int32, gather_81))[name = string("concat_45")]; tensor shape_10_cast_fp16 = shape(x = var_1214_cast_fp16)[name = string("shape_10_cast_fp16")]; int32 equal_10_y_0 = const()[name = string("equal_10_y_0"), val = int32(-1)]; tensor equal_10 = equal(x = concat_45, y = equal_10_y_0)[name = string("equal_10")]; tensor select_10 = select(a = shape_10_cast_fp16, b = concat_45, cond = equal_10)[name = string("select_10")]; tensor real_div_10 = real_div(x = select_10, y = shape_10_cast_fp16)[name = string("real_div_10")]; tensor hidden_states_259_cast_fp16 = tile(reps = real_div_10, x = var_1214_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; tensor concat_46x = const()[name = string("concat_46x"), val = tensor([1, 16, -1, 128])]; tensor key_states_11_cast_fp16 = reshape(shape = concat_46x, x = hidden_states_259_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor hidden_states_261_cast_fp16 = transpose(perm = hidden_states_261_perm_0, x = var_1171_cast_fp16)[name = string("transpose_89")]; tensor var_1224_shape_cast_fp16 = shape(x = hidden_states_261_cast_fp16)[name = string("op_1224_shape_cast_fp16")]; int32 gather_82 = const()[name = string("gather_82"), val = int32(1)]; int32 gather_83 = const()[name = string("gather_83"), val = int32(8)]; int32 gather_84_axis_0 = const()[name = string("gather_84_axis_0"), val = int32(0)]; int32 gather_84_batch_dims_0 = const()[name = string("gather_84_batch_dims_0"), val = int32(0)]; bool gather_84_validate_indices_0 = const()[name = string("gather_84_validate_indices_0"), val = bool(false)]; string var_1224_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1224_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_84_indices_0_to_uint16 = const()[name = string("gather_84_indices_0_to_uint16"), val = uint16(2)]; tensor var_1224_shape_cast_fp16_to_uint16 = cast(dtype = var_1224_shape_cast_fp16_to_uint16_dtype_0, x = var_1224_shape_cast_fp16)[name = string("cast_359")]; uint16 gather_84_cast_uint16 = gather(axis = gather_84_axis_0, batch_dims = gather_84_batch_dims_0, indices = gather_84_indices_0_to_uint16, validate_indices = gather_84_validate_indices_0, x = var_1224_shape_cast_fp16_to_uint16)[name = string("gather_84_cast_uint16")]; string gather_84_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_84_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_85 = const()[name = string("gather_85"), val = int32(128)]; tensor var_1231_axes_0 = const()[name = string("op_1231_axes_0"), val = tensor([2])]; tensor var_1231_cast_fp16 = expand_dims(axes = var_1231_axes_0, x = hidden_states_261_cast_fp16)[name = string("op_1231_cast_fp16")]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; int32 gather_84_cast_uint16_to_int32 = cast(dtype = gather_84_cast_uint16_to_int32_dtype_0, x = gather_84_cast_uint16)[name = string("cast_358")]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (gather_82, gather_83, var_1106, gather_84_cast_uint16_to_int32, gather_85))[name = string("concat_47")]; tensor shape_11_cast_fp16 = shape(x = var_1231_cast_fp16)[name = string("shape_11_cast_fp16")]; int32 equal_11_y_0 = const()[name = string("equal_11_y_0"), val = int32(-1)]; tensor equal_11 = equal(x = concat_47, y = equal_11_y_0)[name = string("equal_11")]; tensor select_11 = select(a = shape_11_cast_fp16, b = concat_47, cond = equal_11)[name = string("select_11")]; tensor real_div_11 = real_div(x = select_11, y = shape_11_cast_fp16)[name = string("real_div_11")]; tensor hidden_states_263_cast_fp16 = tile(reps = real_div_11, x = var_1231_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; tensor concat_48x = const()[name = string("concat_48x"), val = tensor([1, 16, -1, 128])]; tensor value_states_11_cast_fp16 = reshape(shape = concat_48x, x = hidden_states_263_cast_fp16)[name = string("value_states_11_cast_fp16")]; bool var_1242_transpose_x_1 = const()[name = string("op_1242_transpose_x_1"), val = bool(false)]; bool var_1242_transpose_y_1 = const()[name = string("op_1242_transpose_y_1"), val = bool(true)]; tensor var_1242_cast_fp16 = matmul(transpose_x = var_1242_transpose_x_1, transpose_y = var_1242_transpose_y_1, x = query_11_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_1242_cast_fp16")]; fp16 var_1243_to_fp16 = const()[name = string("op_1243_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_21_cast_fp16 = mul(x = var_1242_cast_fp16, y = var_1243_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor input_63_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1246_cast_fp16 = softmax(axis = var_1107, x = input_63_cast_fp16)[name = string("op_1246_cast_fp16")]; bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = var_1246_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_1250_perm_0 = const()[name = string("op_1250_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_362")]; tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (gather_72, gather_73_cast_uint16_to_int32, var_1107))[name = string("concat_49")]; tensor var_1250_cast_fp16 = transpose(perm = var_1250_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_88")]; tensor var_1253_cast_fp16 = reshape(shape = concat_49, x = var_1250_cast_fp16)[name = string("op_1253_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476335424)))]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = var_1253_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor hidden_states_267_cast_fp16 = add(x = hidden_states_231_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; fp16 var_1106_promoted_3_to_fp16 = const()[name = string("op_1106_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1260_cast_fp16 = pow(x = hidden_states_267_cast_fp16, y = var_1106_promoted_3_to_fp16)[name = string("op_1260_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1260_cast_fp16)[name = string("variance_47_cast_fp16")]; fp16 var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1264_cast_fp16 = add(x = variance_47_cast_fp16, y = var_1263_to_fp16)[name = string("op_1264_cast_fp16")]; fp32 var_1265_epsilon_0 = const()[name = string("op_1265_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1265_cast_fp16 = rsqrt(epsilon = var_1265_epsilon_0, x = var_1264_cast_fp16)[name = string("op_1265_cast_fp16")]; tensor hidden_states_271_cast_fp16 = mul(x = hidden_states_267_cast_fp16, y = var_1265_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; tensor layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480529792)))]; tensor input_69_cast_fp16 = mul(x = layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_271_cast_fp16)[name = string("input_69_cast_fp16")]; tensor layers_5_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_5_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480531904)))]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_1277_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1277_cast_fp16")]; tensor layers_5_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_5_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(486823424)))]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_73_cast_fp16 = mul(x = var_1277_cast_fp16, y = linear_40_cast_fp16)[name = string("input_73_cast_fp16")]; tensor layers_5_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_5_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493114944)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor hidden_states_277_cast_fp16 = add(x = hidden_states_267_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; int32 var_1294 = const()[name = string("op_1294"), val = int32(2)]; int32 var_1295 = const()[name = string("op_1295"), val = int32(-1)]; fp16 var_1294_promoted_to_fp16 = const()[name = string("op_1294_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1304_cast_fp16 = pow(x = hidden_states_277_cast_fp16, y = var_1294_promoted_to_fp16)[name = string("op_1304_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_1304_cast_fp16)[name = string("variance_49_cast_fp16")]; fp16 var_1307_to_fp16 = const()[name = string("op_1307_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1308_cast_fp16 = add(x = variance_49_cast_fp16, y = var_1307_to_fp16)[name = string("op_1308_cast_fp16")]; fp32 var_1309_epsilon_0 = const()[name = string("op_1309_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1309_cast_fp16 = rsqrt(epsilon = var_1309_epsilon_0, x = var_1308_cast_fp16)[name = string("op_1309_cast_fp16")]; tensor hidden_states_281_cast_fp16 = mul(x = hidden_states_277_cast_fp16, y = var_1309_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; tensor layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499406464)))]; tensor hidden_states_285_cast_fp16 = mul(x = layers_6_input_layernorm_weight_to_fp16, y = hidden_states_281_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; tensor var_1322_shape_cast_fp16 = shape(x = hidden_states_285_cast_fp16)[name = string("op_1322_shape_cast_fp16")]; int32 gather_86 = const()[name = string("gather_86"), val = int32(1)]; int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; string var_1322_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1322_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_87_indices_0_to_uint16 = const()[name = string("gather_87_indices_0_to_uint16"), val = uint16(1)]; tensor var_1322_shape_cast_fp16_to_uint16 = cast(dtype = var_1322_shape_cast_fp16_to_uint16_dtype_0, x = var_1322_shape_cast_fp16)[name = string("cast_357")]; uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = gather_87_indices_0_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_1322_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499408576)))]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = hidden_states_285_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor concat_50x = const()[name = string("concat_50x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_287_cast_fp16 = reshape(shape = concat_50x, x = linear_42_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; fp16 var_1294_promoted_1_to_fp16 = const()[name = string("op_1294_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_1330_cast_fp16 = pow(x = hidden_states_287_cast_fp16, y = var_1294_promoted_1_to_fp16)[name = string("op_1330_cast_fp16")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_1330_cast_fp16)[name = string("variance_51_cast_fp16")]; fp16 var_1333_to_fp16 = const()[name = string("op_1333_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1334_cast_fp16 = add(x = variance_51_cast_fp16, y = var_1333_to_fp16)[name = string("op_1334_cast_fp16")]; fp32 var_1335_epsilon_0 = const()[name = string("op_1335_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1335_cast_fp16 = rsqrt(epsilon = var_1335_epsilon_0, x = var_1334_cast_fp16)[name = string("op_1335_cast_fp16")]; tensor hidden_states_291_cast_fp16 = mul(x = hidden_states_287_cast_fp16, y = var_1335_cast_fp16)[name = string("hidden_states_291_cast_fp16")]; tensor layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503602944)))]; tensor var_1338_cast_fp16 = mul(x = layers_6_self_attn_q_norm_weight_to_fp16, y = hidden_states_291_cast_fp16)[name = string("op_1338_cast_fp16")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503603264)))]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = hidden_states_285_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor concat_51x = const()[name = string("concat_51x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_295_cast_fp16 = reshape(shape = concat_51x, x = linear_43_cast_fp16)[name = string("hidden_states_295_cast_fp16")]; fp16 var_1294_promoted_2_to_fp16 = const()[name = string("op_1294_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_1346_cast_fp16 = pow(x = hidden_states_295_cast_fp16, y = var_1294_promoted_2_to_fp16)[name = string("op_1346_cast_fp16")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_1346_cast_fp16)[name = string("variance_53_cast_fp16")]; fp16 var_1349_to_fp16 = const()[name = string("op_1349_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1350_cast_fp16 = add(x = variance_53_cast_fp16, y = var_1349_to_fp16)[name = string("op_1350_cast_fp16")]; fp32 var_1351_epsilon_0 = const()[name = string("op_1351_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1351_cast_fp16 = rsqrt(epsilon = var_1351_epsilon_0, x = var_1350_cast_fp16)[name = string("op_1351_cast_fp16")]; tensor hidden_states_299_cast_fp16 = mul(x = hidden_states_295_cast_fp16, y = var_1351_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; tensor layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505700480)))]; tensor var_1354_cast_fp16 = mul(x = layers_6_self_attn_k_norm_weight_to_fp16, y = hidden_states_299_cast_fp16)[name = string("op_1354_cast_fp16")]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505700800)))]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = hidden_states_285_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, -1, 8, 128])]; tensor var_1359_cast_fp16 = reshape(shape = concat_52x, x = linear_44_cast_fp16)[name = string("op_1359_cast_fp16")]; tensor hidden_states_307_perm_0 = const()[name = string("hidden_states_307_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1338_cast_fp16)[name = string("transpose_87")]; tensor var_1363_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1363_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1374_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1374_cast_fp16")]; bool var_1376_interleave_0 = const()[name = string("op_1376_interleave_0"), val = bool(false)]; tensor var_1376_cast_fp16 = concat(axis = var_1295, interleave = var_1376_interleave_0, values = (var_1374_cast_fp16, x1_25_cast_fp16))[name = string("op_1376_cast_fp16")]; tensor var_1377_cast_fp16 = mul(x = var_1376_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1377_cast_fp16")]; tensor query_13_cast_fp16 = add(x = var_1363_cast_fp16, y = var_1377_cast_fp16)[name = string("query_13_cast_fp16")]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1354_cast_fp16)[name = string("transpose_86")]; tensor var_1379_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1379_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1390_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1390_cast_fp16")]; bool var_1392_interleave_0 = const()[name = string("op_1392_interleave_0"), val = bool(false)]; tensor var_1392_cast_fp16 = concat(axis = var_1295, interleave = var_1392_interleave_0, values = (var_1390_cast_fp16, x1_27_cast_fp16))[name = string("op_1392_cast_fp16")]; tensor var_1393_cast_fp16 = mul(x = var_1392_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1393_cast_fp16")]; tensor hidden_states_303_cast_fp16 = add(x = var_1379_cast_fp16, y = var_1393_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; tensor var_1395_shape_cast_fp16 = shape(x = hidden_states_303_cast_fp16)[name = string("op_1395_shape_cast_fp16")]; int32 gather_92 = const()[name = string("gather_92"), val = int32(1)]; int32 gather_93 = const()[name = string("gather_93"), val = int32(8)]; int32 gather_94_axis_0 = const()[name = string("gather_94_axis_0"), val = int32(0)]; int32 gather_94_batch_dims_0 = const()[name = string("gather_94_batch_dims_0"), val = int32(0)]; bool gather_94_validate_indices_0 = const()[name = string("gather_94_validate_indices_0"), val = bool(false)]; string var_1395_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1395_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_94_indices_0_to_uint16 = const()[name = string("gather_94_indices_0_to_uint16"), val = uint16(2)]; tensor var_1395_shape_cast_fp16_to_uint16 = cast(dtype = var_1395_shape_cast_fp16_to_uint16_dtype_0, x = var_1395_shape_cast_fp16)[name = string("cast_355")]; uint16 gather_94_cast_uint16 = gather(axis = gather_94_axis_0, batch_dims = gather_94_batch_dims_0, indices = gather_94_indices_0_to_uint16, validate_indices = gather_94_validate_indices_0, x = var_1395_shape_cast_fp16_to_uint16)[name = string("gather_94_cast_uint16")]; string gather_94_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_94_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_95 = const()[name = string("gather_95"), val = int32(128)]; tensor var_1402_axes_0 = const()[name = string("op_1402_axes_0"), val = tensor([2])]; tensor var_1402_cast_fp16 = expand_dims(axes = var_1402_axes_0, x = hidden_states_303_cast_fp16)[name = string("op_1402_cast_fp16")]; int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; int32 gather_94_cast_uint16_to_int32 = cast(dtype = gather_94_cast_uint16_to_int32_dtype_0, x = gather_94_cast_uint16)[name = string("cast_354")]; tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_92, gather_93, var_1294, gather_94_cast_uint16_to_int32, gather_95))[name = string("concat_53")]; tensor shape_12_cast_fp16 = shape(x = var_1402_cast_fp16)[name = string("shape_12_cast_fp16")]; int32 equal_12_y_0 = const()[name = string("equal_12_y_0"), val = int32(-1)]; tensor equal_12 = equal(x = concat_53, y = equal_12_y_0)[name = string("equal_12")]; tensor select_12 = select(a = shape_12_cast_fp16, b = concat_53, cond = equal_12)[name = string("select_12")]; tensor real_div_12 = real_div(x = select_12, y = shape_12_cast_fp16)[name = string("real_div_12")]; tensor hidden_states_305_cast_fp16 = tile(reps = real_div_12, x = var_1402_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 16, -1, 128])]; tensor key_states_13_cast_fp16 = reshape(shape = concat_54x, x = hidden_states_305_cast_fp16)[name = string("key_states_13_cast_fp16")]; tensor hidden_states_307_cast_fp16 = transpose(perm = hidden_states_307_perm_0, x = var_1359_cast_fp16)[name = string("transpose_85")]; tensor var_1412_shape_cast_fp16 = shape(x = hidden_states_307_cast_fp16)[name = string("op_1412_shape_cast_fp16")]; int32 gather_96 = const()[name = string("gather_96"), val = int32(1)]; int32 gather_97 = const()[name = string("gather_97"), val = int32(8)]; int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; string var_1412_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1412_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_98_indices_0_to_uint16 = const()[name = string("gather_98_indices_0_to_uint16"), val = uint16(2)]; tensor var_1412_shape_cast_fp16_to_uint16 = cast(dtype = var_1412_shape_cast_fp16_to_uint16_dtype_0, x = var_1412_shape_cast_fp16)[name = string("cast_353")]; uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = gather_98_indices_0_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1412_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_99 = const()[name = string("gather_99"), val = int32(128)]; tensor var_1419_axes_0 = const()[name = string("op_1419_axes_0"), val = tensor([2])]; tensor var_1419_cast_fp16 = expand_dims(axes = var_1419_axes_0, x = hidden_states_307_cast_fp16)[name = string("op_1419_cast_fp16")]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_352")]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (gather_96, gather_97, var_1294, gather_98_cast_uint16_to_int32, gather_99))[name = string("concat_55")]; tensor shape_13_cast_fp16 = shape(x = var_1419_cast_fp16)[name = string("shape_13_cast_fp16")]; int32 equal_13_y_0 = const()[name = string("equal_13_y_0"), val = int32(-1)]; tensor equal_13 = equal(x = concat_55, y = equal_13_y_0)[name = string("equal_13")]; tensor select_13 = select(a = shape_13_cast_fp16, b = concat_55, cond = equal_13)[name = string("select_13")]; tensor real_div_13 = real_div(x = select_13, y = shape_13_cast_fp16)[name = string("real_div_13")]; tensor hidden_states_309_cast_fp16 = tile(reps = real_div_13, x = var_1419_cast_fp16)[name = string("hidden_states_309_cast_fp16")]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, 16, -1, 128])]; tensor value_states_13_cast_fp16 = reshape(shape = concat_56x, x = hidden_states_309_cast_fp16)[name = string("value_states_13_cast_fp16")]; bool var_1430_transpose_x_1 = const()[name = string("op_1430_transpose_x_1"), val = bool(false)]; bool var_1430_transpose_y_1 = const()[name = string("op_1430_transpose_y_1"), val = bool(true)]; tensor var_1430_cast_fp16 = matmul(transpose_x = var_1430_transpose_x_1, transpose_y = var_1430_transpose_y_1, x = query_13_cast_fp16, y = key_states_13_cast_fp16)[name = string("op_1430_cast_fp16")]; fp16 var_1431_to_fp16 = const()[name = string("op_1431_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_1430_cast_fp16, y = var_1431_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor input_75_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_75_cast_fp16")]; tensor var_1434_cast_fp16 = softmax(axis = var_1295, x = input_75_cast_fp16)[name = string("op_1434_cast_fp16")]; bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_1434_cast_fp16, y = value_states_13_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1438_perm_0 = const()[name = string("op_1438_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_356")]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (gather_86, gather_87_cast_uint16_to_int32, var_1295))[name = string("concat_57")]; tensor var_1438_cast_fp16 = transpose(perm = var_1438_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_84")]; tensor var_1441_cast_fp16 = reshape(shape = concat_57, x = var_1438_cast_fp16)[name = string("op_1441_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(507798016)))]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = var_1441_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor hidden_states_313_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_313_cast_fp16")]; fp16 var_1294_promoted_3_to_fp16 = const()[name = string("op_1294_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1448_cast_fp16 = pow(x = hidden_states_313_cast_fp16, y = var_1294_promoted_3_to_fp16)[name = string("op_1448_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_1448_cast_fp16)[name = string("variance_55_cast_fp16")]; fp16 var_1451_to_fp16 = const()[name = string("op_1451_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1452_cast_fp16 = add(x = variance_55_cast_fp16, y = var_1451_to_fp16)[name = string("op_1452_cast_fp16")]; fp32 var_1453_epsilon_0 = const()[name = string("op_1453_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1453_cast_fp16 = rsqrt(epsilon = var_1453_epsilon_0, x = var_1452_cast_fp16)[name = string("op_1453_cast_fp16")]; tensor hidden_states_317_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = var_1453_cast_fp16)[name = string("hidden_states_317_cast_fp16")]; tensor layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511992384)))]; tensor input_81_cast_fp16 = mul(x = layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_317_cast_fp16)[name = string("input_81_cast_fp16")]; tensor layers_6_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_6_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511994496)))]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_1465_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1465_cast_fp16")]; tensor layers_6_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_6_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518286016)))]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_85_cast_fp16 = mul(x = var_1465_cast_fp16, y = linear_47_cast_fp16)[name = string("input_85_cast_fp16")]; tensor layers_6_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_6_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524577536)))]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor hidden_states_323_cast_fp16 = add(x = hidden_states_313_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; int32 var_1482 = const()[name = string("op_1482"), val = int32(2)]; int32 var_1483 = const()[name = string("op_1483"), val = int32(-1)]; fp16 var_1482_promoted_to_fp16 = const()[name = string("op_1482_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1492_cast_fp16 = pow(x = hidden_states_323_cast_fp16, y = var_1482_promoted_to_fp16)[name = string("op_1492_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_1492_cast_fp16)[name = string("variance_57_cast_fp16")]; fp16 var_1495_to_fp16 = const()[name = string("op_1495_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1496_cast_fp16 = add(x = variance_57_cast_fp16, y = var_1495_to_fp16)[name = string("op_1496_cast_fp16")]; fp32 var_1497_epsilon_0 = const()[name = string("op_1497_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1497_cast_fp16 = rsqrt(epsilon = var_1497_epsilon_0, x = var_1496_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor hidden_states_327_cast_fp16 = mul(x = hidden_states_323_cast_fp16, y = var_1497_cast_fp16)[name = string("hidden_states_327_cast_fp16")]; tensor layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530869056)))]; tensor hidden_states_331_cast_fp16 = mul(x = layers_7_input_layernorm_weight_to_fp16, y = hidden_states_327_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; tensor var_1510_shape_cast_fp16 = shape(x = hidden_states_331_cast_fp16)[name = string("op_1510_shape_cast_fp16")]; int32 gather_100 = const()[name = string("gather_100"), val = int32(1)]; int32 gather_101_axis_0 = const()[name = string("gather_101_axis_0"), val = int32(0)]; int32 gather_101_batch_dims_0 = const()[name = string("gather_101_batch_dims_0"), val = int32(0)]; bool gather_101_validate_indices_0 = const()[name = string("gather_101_validate_indices_0"), val = bool(false)]; string var_1510_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1510_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_101_indices_0_to_uint16 = const()[name = string("gather_101_indices_0_to_uint16"), val = uint16(1)]; tensor var_1510_shape_cast_fp16_to_uint16 = cast(dtype = var_1510_shape_cast_fp16_to_uint16_dtype_0, x = var_1510_shape_cast_fp16)[name = string("cast_351")]; uint16 gather_101_cast_uint16 = gather(axis = gather_101_axis_0, batch_dims = gather_101_batch_dims_0, indices = gather_101_indices_0_to_uint16, validate_indices = gather_101_validate_indices_0, x = var_1510_shape_cast_fp16_to_uint16)[name = string("gather_101_cast_uint16")]; string gather_101_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_101_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530871168)))]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = hidden_states_331_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_333_cast_fp16 = reshape(shape = concat_58x, x = linear_49_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; fp16 var_1482_promoted_1_to_fp16 = const()[name = string("op_1482_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_1518_cast_fp16 = pow(x = hidden_states_333_cast_fp16, y = var_1482_promoted_1_to_fp16)[name = string("op_1518_cast_fp16")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_1518_cast_fp16)[name = string("variance_59_cast_fp16")]; fp16 var_1521_to_fp16 = const()[name = string("op_1521_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1522_cast_fp16 = add(x = variance_59_cast_fp16, y = var_1521_to_fp16)[name = string("op_1522_cast_fp16")]; fp32 var_1523_epsilon_0 = const()[name = string("op_1523_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1523_cast_fp16 = rsqrt(epsilon = var_1523_epsilon_0, x = var_1522_cast_fp16)[name = string("op_1523_cast_fp16")]; tensor hidden_states_337_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = var_1523_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; tensor layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535065536)))]; tensor var_1526_cast_fp16 = mul(x = layers_7_self_attn_q_norm_weight_to_fp16, y = hidden_states_337_cast_fp16)[name = string("op_1526_cast_fp16")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535065856)))]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = hidden_states_331_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_341_cast_fp16 = reshape(shape = concat_59x, x = linear_50_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; fp16 var_1482_promoted_2_to_fp16 = const()[name = string("op_1482_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_1534_cast_fp16 = pow(x = hidden_states_341_cast_fp16, y = var_1482_promoted_2_to_fp16)[name = string("op_1534_cast_fp16")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_1534_cast_fp16)[name = string("variance_61_cast_fp16")]; fp16 var_1537_to_fp16 = const()[name = string("op_1537_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1538_cast_fp16 = add(x = variance_61_cast_fp16, y = var_1537_to_fp16)[name = string("op_1538_cast_fp16")]; fp32 var_1539_epsilon_0 = const()[name = string("op_1539_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1539_cast_fp16 = rsqrt(epsilon = var_1539_epsilon_0, x = var_1538_cast_fp16)[name = string("op_1539_cast_fp16")]; tensor hidden_states_345_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = var_1539_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; tensor layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537163072)))]; tensor var_1542_cast_fp16 = mul(x = layers_7_self_attn_k_norm_weight_to_fp16, y = hidden_states_345_cast_fp16)[name = string("op_1542_cast_fp16")]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537163392)))]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = hidden_states_331_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor concat_60x = const()[name = string("concat_60x"), val = tensor([1, -1, 8, 128])]; tensor var_1547_cast_fp16 = reshape(shape = concat_60x, x = linear_51_cast_fp16)[name = string("op_1547_cast_fp16")]; tensor hidden_states_353_perm_0 = const()[name = string("hidden_states_353_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1526_cast_fp16)[name = string("transpose_83")]; tensor var_1551_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1551_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1562_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1562_cast_fp16")]; bool var_1564_interleave_0 = const()[name = string("op_1564_interleave_0"), val = bool(false)]; tensor var_1564_cast_fp16 = concat(axis = var_1483, interleave = var_1564_interleave_0, values = (var_1562_cast_fp16, x1_29_cast_fp16))[name = string("op_1564_cast_fp16")]; tensor var_1565_cast_fp16 = mul(x = var_1564_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1565_cast_fp16")]; tensor query_15_cast_fp16 = add(x = var_1551_cast_fp16, y = var_1565_cast_fp16)[name = string("query_15_cast_fp16")]; tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1542_cast_fp16)[name = string("transpose_82")]; tensor var_1567_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1567_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1578_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1578_cast_fp16")]; bool var_1580_interleave_0 = const()[name = string("op_1580_interleave_0"), val = bool(false)]; tensor var_1580_cast_fp16 = concat(axis = var_1483, interleave = var_1580_interleave_0, values = (var_1578_cast_fp16, x1_31_cast_fp16))[name = string("op_1580_cast_fp16")]; tensor var_1581_cast_fp16 = mul(x = var_1580_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1581_cast_fp16")]; tensor hidden_states_349_cast_fp16 = add(x = var_1567_cast_fp16, y = var_1581_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; tensor var_1583_shape_cast_fp16 = shape(x = hidden_states_349_cast_fp16)[name = string("op_1583_shape_cast_fp16")]; int32 gather_106 = const()[name = string("gather_106"), val = int32(1)]; int32 gather_107 = const()[name = string("gather_107"), val = int32(8)]; int32 gather_108_axis_0 = const()[name = string("gather_108_axis_0"), val = int32(0)]; int32 gather_108_batch_dims_0 = const()[name = string("gather_108_batch_dims_0"), val = int32(0)]; bool gather_108_validate_indices_0 = const()[name = string("gather_108_validate_indices_0"), val = bool(false)]; string var_1583_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1583_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_108_indices_0_to_uint16 = const()[name = string("gather_108_indices_0_to_uint16"), val = uint16(2)]; tensor var_1583_shape_cast_fp16_to_uint16 = cast(dtype = var_1583_shape_cast_fp16_to_uint16_dtype_0, x = var_1583_shape_cast_fp16)[name = string("cast_349")]; uint16 gather_108_cast_uint16 = gather(axis = gather_108_axis_0, batch_dims = gather_108_batch_dims_0, indices = gather_108_indices_0_to_uint16, validate_indices = gather_108_validate_indices_0, x = var_1583_shape_cast_fp16_to_uint16)[name = string("gather_108_cast_uint16")]; string gather_108_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_108_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_109 = const()[name = string("gather_109"), val = int32(128)]; tensor var_1590_axes_0 = const()[name = string("op_1590_axes_0"), val = tensor([2])]; tensor var_1590_cast_fp16 = expand_dims(axes = var_1590_axes_0, x = hidden_states_349_cast_fp16)[name = string("op_1590_cast_fp16")]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; int32 gather_108_cast_uint16_to_int32 = cast(dtype = gather_108_cast_uint16_to_int32_dtype_0, x = gather_108_cast_uint16)[name = string("cast_348")]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (gather_106, gather_107, var_1482, gather_108_cast_uint16_to_int32, gather_109))[name = string("concat_61")]; tensor shape_14_cast_fp16 = shape(x = var_1590_cast_fp16)[name = string("shape_14_cast_fp16")]; int32 equal_14_y_0 = const()[name = string("equal_14_y_0"), val = int32(-1)]; tensor equal_14 = equal(x = concat_61, y = equal_14_y_0)[name = string("equal_14")]; tensor select_14 = select(a = shape_14_cast_fp16, b = concat_61, cond = equal_14)[name = string("select_14")]; tensor real_div_14 = real_div(x = select_14, y = shape_14_cast_fp16)[name = string("real_div_14")]; tensor hidden_states_351_cast_fp16 = tile(reps = real_div_14, x = var_1590_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; tensor concat_62x = const()[name = string("concat_62x"), val = tensor([1, 16, -1, 128])]; tensor key_states_15_cast_fp16 = reshape(shape = concat_62x, x = hidden_states_351_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor hidden_states_353_cast_fp16 = transpose(perm = hidden_states_353_perm_0, x = var_1547_cast_fp16)[name = string("transpose_81")]; tensor var_1600_shape_cast_fp16 = shape(x = hidden_states_353_cast_fp16)[name = string("op_1600_shape_cast_fp16")]; int32 gather_110 = const()[name = string("gather_110"), val = int32(1)]; int32 gather_111 = const()[name = string("gather_111"), val = int32(8)]; int32 gather_112_axis_0 = const()[name = string("gather_112_axis_0"), val = int32(0)]; int32 gather_112_batch_dims_0 = const()[name = string("gather_112_batch_dims_0"), val = int32(0)]; bool gather_112_validate_indices_0 = const()[name = string("gather_112_validate_indices_0"), val = bool(false)]; string var_1600_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1600_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_112_indices_0_to_uint16 = const()[name = string("gather_112_indices_0_to_uint16"), val = uint16(2)]; tensor var_1600_shape_cast_fp16_to_uint16 = cast(dtype = var_1600_shape_cast_fp16_to_uint16_dtype_0, x = var_1600_shape_cast_fp16)[name = string("cast_347")]; uint16 gather_112_cast_uint16 = gather(axis = gather_112_axis_0, batch_dims = gather_112_batch_dims_0, indices = gather_112_indices_0_to_uint16, validate_indices = gather_112_validate_indices_0, x = var_1600_shape_cast_fp16_to_uint16)[name = string("gather_112_cast_uint16")]; string gather_112_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_112_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_113 = const()[name = string("gather_113"), val = int32(128)]; tensor var_1607_axes_0 = const()[name = string("op_1607_axes_0"), val = tensor([2])]; tensor var_1607_cast_fp16 = expand_dims(axes = var_1607_axes_0, x = hidden_states_353_cast_fp16)[name = string("op_1607_cast_fp16")]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; int32 gather_112_cast_uint16_to_int32 = cast(dtype = gather_112_cast_uint16_to_int32_dtype_0, x = gather_112_cast_uint16)[name = string("cast_346")]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (gather_110, gather_111, var_1482, gather_112_cast_uint16_to_int32, gather_113))[name = string("concat_63")]; tensor shape_15_cast_fp16 = shape(x = var_1607_cast_fp16)[name = string("shape_15_cast_fp16")]; int32 equal_15_y_0 = const()[name = string("equal_15_y_0"), val = int32(-1)]; tensor equal_15 = equal(x = concat_63, y = equal_15_y_0)[name = string("equal_15")]; tensor select_15 = select(a = shape_15_cast_fp16, b = concat_63, cond = equal_15)[name = string("select_15")]; tensor real_div_15 = real_div(x = select_15, y = shape_15_cast_fp16)[name = string("real_div_15")]; tensor hidden_states_355_cast_fp16 = tile(reps = real_div_15, x = var_1607_cast_fp16)[name = string("hidden_states_355_cast_fp16")]; tensor concat_64x = const()[name = string("concat_64x"), val = tensor([1, 16, -1, 128])]; tensor value_states_15_cast_fp16 = reshape(shape = concat_64x, x = hidden_states_355_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_1618_transpose_x_1 = const()[name = string("op_1618_transpose_x_1"), val = bool(false)]; bool var_1618_transpose_y_1 = const()[name = string("op_1618_transpose_y_1"), val = bool(true)]; tensor var_1618_cast_fp16 = matmul(transpose_x = var_1618_transpose_x_1, transpose_y = var_1618_transpose_y_1, x = query_15_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_1618_cast_fp16")]; fp16 var_1619_to_fp16 = const()[name = string("op_1619_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_29_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor input_87_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_87_cast_fp16")]; tensor var_1622_cast_fp16 = softmax(axis = var_1483, x = input_87_cast_fp16)[name = string("op_1622_cast_fp16")]; bool attn_output_29_transpose_x_0 = const()[name = string("attn_output_29_transpose_x_0"), val = bool(false)]; bool attn_output_29_transpose_y_0 = const()[name = string("attn_output_29_transpose_y_0"), val = bool(false)]; tensor attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = var_1622_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_29_cast_fp16")]; tensor var_1626_perm_0 = const()[name = string("op_1626_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_65_axis_0 = const()[name = string("concat_65_axis_0"), val = int32(0)]; bool concat_65_interleave_0 = const()[name = string("concat_65_interleave_0"), val = bool(false)]; int32 gather_101_cast_uint16_to_int32 = cast(dtype = gather_101_cast_uint16_to_int32_dtype_0, x = gather_101_cast_uint16)[name = string("cast_350")]; tensor concat_65 = concat(axis = concat_65_axis_0, interleave = concat_65_interleave_0, values = (gather_100, gather_101_cast_uint16_to_int32, var_1483))[name = string("concat_65")]; tensor var_1626_cast_fp16 = transpose(perm = var_1626_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_80")]; tensor var_1629_cast_fp16 = reshape(shape = concat_65, x = var_1626_cast_fp16)[name = string("op_1629_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539260608)))]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = var_1629_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor hidden_states_359_cast_fp16 = add(x = hidden_states_323_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; fp16 var_1482_promoted_3_to_fp16 = const()[name = string("op_1482_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1636_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_1482_promoted_3_to_fp16)[name = string("op_1636_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_1636_cast_fp16)[name = string("variance_63_cast_fp16")]; fp16 var_1639_to_fp16 = const()[name = string("op_1639_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1640_cast_fp16 = add(x = variance_63_cast_fp16, y = var_1639_to_fp16)[name = string("op_1640_cast_fp16")]; fp32 var_1641_epsilon_0 = const()[name = string("op_1641_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1641_cast_fp16 = rsqrt(epsilon = var_1641_epsilon_0, x = var_1640_cast_fp16)[name = string("op_1641_cast_fp16")]; tensor hidden_states_363_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_1641_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; tensor layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543454976)))]; tensor input_93_cast_fp16 = mul(x = layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_363_cast_fp16)[name = string("input_93_cast_fp16")]; tensor layers_7_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_7_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543457088)))]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_1653_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1653_cast_fp16")]; tensor layers_7_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_7_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549748608)))]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_97_cast_fp16 = mul(x = var_1653_cast_fp16, y = linear_54_cast_fp16)[name = string("input_97_cast_fp16")]; tensor layers_7_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_7_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556040128)))]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor hidden_states_369_cast_fp16 = add(x = hidden_states_359_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; int32 var_1670 = const()[name = string("op_1670"), val = int32(2)]; int32 var_1671 = const()[name = string("op_1671"), val = int32(-1)]; fp16 var_1670_promoted_to_fp16 = const()[name = string("op_1670_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1680_cast_fp16 = pow(x = hidden_states_369_cast_fp16, y = var_1670_promoted_to_fp16)[name = string("op_1680_cast_fp16")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_1680_cast_fp16)[name = string("variance_65_cast_fp16")]; fp16 var_1683_to_fp16 = const()[name = string("op_1683_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1684_cast_fp16 = add(x = variance_65_cast_fp16, y = var_1683_to_fp16)[name = string("op_1684_cast_fp16")]; fp32 var_1685_epsilon_0 = const()[name = string("op_1685_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1685_cast_fp16 = rsqrt(epsilon = var_1685_epsilon_0, x = var_1684_cast_fp16)[name = string("op_1685_cast_fp16")]; tensor hidden_states_373_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = var_1685_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; tensor layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562331648)))]; tensor hidden_states_377_cast_fp16 = mul(x = layers_8_input_layernorm_weight_to_fp16, y = hidden_states_373_cast_fp16)[name = string("hidden_states_377_cast_fp16")]; tensor var_1698_shape_cast_fp16 = shape(x = hidden_states_377_cast_fp16)[name = string("op_1698_shape_cast_fp16")]; int32 gather_114 = const()[name = string("gather_114"), val = int32(1)]; int32 gather_115_axis_0 = const()[name = string("gather_115_axis_0"), val = int32(0)]; int32 gather_115_batch_dims_0 = const()[name = string("gather_115_batch_dims_0"), val = int32(0)]; bool gather_115_validate_indices_0 = const()[name = string("gather_115_validate_indices_0"), val = bool(false)]; string var_1698_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1698_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_115_indices_0_to_uint16 = const()[name = string("gather_115_indices_0_to_uint16"), val = uint16(1)]; tensor var_1698_shape_cast_fp16_to_uint16 = cast(dtype = var_1698_shape_cast_fp16_to_uint16_dtype_0, x = var_1698_shape_cast_fp16)[name = string("cast_345")]; uint16 gather_115_cast_uint16 = gather(axis = gather_115_axis_0, batch_dims = gather_115_batch_dims_0, indices = gather_115_indices_0_to_uint16, validate_indices = gather_115_validate_indices_0, x = var_1698_shape_cast_fp16_to_uint16)[name = string("gather_115_cast_uint16")]; string gather_115_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_115_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562333760)))]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = hidden_states_377_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_379_cast_fp16 = reshape(shape = concat_66x, x = linear_56_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; fp16 var_1670_promoted_1_to_fp16 = const()[name = string("op_1670_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_1706_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_1670_promoted_1_to_fp16)[name = string("op_1706_cast_fp16")]; tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_1706_cast_fp16)[name = string("variance_67_cast_fp16")]; fp16 var_1709_to_fp16 = const()[name = string("op_1709_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1710_cast_fp16 = add(x = variance_67_cast_fp16, y = var_1709_to_fp16)[name = string("op_1710_cast_fp16")]; fp32 var_1711_epsilon_0 = const()[name = string("op_1711_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1711_cast_fp16 = rsqrt(epsilon = var_1711_epsilon_0, x = var_1710_cast_fp16)[name = string("op_1711_cast_fp16")]; tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_1711_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; tensor layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566528128)))]; tensor var_1714_cast_fp16 = mul(x = layers_8_self_attn_q_norm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("op_1714_cast_fp16")]; tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566528448)))]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = hidden_states_377_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_387_cast_fp16 = reshape(shape = concat_67x, x = linear_57_cast_fp16)[name = string("hidden_states_387_cast_fp16")]; fp16 var_1670_promoted_2_to_fp16 = const()[name = string("op_1670_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_1722_cast_fp16 = pow(x = hidden_states_387_cast_fp16, y = var_1670_promoted_2_to_fp16)[name = string("op_1722_cast_fp16")]; tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_1722_cast_fp16)[name = string("variance_69_cast_fp16")]; fp16 var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1726_cast_fp16 = add(x = variance_69_cast_fp16, y = var_1725_to_fp16)[name = string("op_1726_cast_fp16")]; fp32 var_1727_epsilon_0 = const()[name = string("op_1727_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1727_cast_fp16 = rsqrt(epsilon = var_1727_epsilon_0, x = var_1726_cast_fp16)[name = string("op_1727_cast_fp16")]; tensor hidden_states_391_cast_fp16 = mul(x = hidden_states_387_cast_fp16, y = var_1727_cast_fp16)[name = string("hidden_states_391_cast_fp16")]; tensor layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568625664)))]; tensor var_1730_cast_fp16 = mul(x = layers_8_self_attn_k_norm_weight_to_fp16, y = hidden_states_391_cast_fp16)[name = string("op_1730_cast_fp16")]; tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568625984)))]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = hidden_states_377_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor concat_68x = const()[name = string("concat_68x"), val = tensor([1, -1, 8, 128])]; tensor var_1735_cast_fp16 = reshape(shape = concat_68x, x = linear_58_cast_fp16)[name = string("op_1735_cast_fp16")]; tensor hidden_states_399_perm_0 = const()[name = string("hidden_states_399_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1714_cast_fp16)[name = string("transpose_79")]; tensor var_1739_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1739_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1750_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1750_cast_fp16")]; bool var_1752_interleave_0 = const()[name = string("op_1752_interleave_0"), val = bool(false)]; tensor var_1752_cast_fp16 = concat(axis = var_1671, interleave = var_1752_interleave_0, values = (var_1750_cast_fp16, x1_33_cast_fp16))[name = string("op_1752_cast_fp16")]; tensor var_1753_cast_fp16 = mul(x = var_1752_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1753_cast_fp16")]; tensor query_17_cast_fp16 = add(x = var_1739_cast_fp16, y = var_1753_cast_fp16)[name = string("query_17_cast_fp16")]; tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1730_cast_fp16)[name = string("transpose_78")]; tensor var_1755_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1755_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1766_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1766_cast_fp16")]; bool var_1768_interleave_0 = const()[name = string("op_1768_interleave_0"), val = bool(false)]; tensor var_1768_cast_fp16 = concat(axis = var_1671, interleave = var_1768_interleave_0, values = (var_1766_cast_fp16, x1_35_cast_fp16))[name = string("op_1768_cast_fp16")]; tensor var_1769_cast_fp16 = mul(x = var_1768_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1769_cast_fp16")]; tensor hidden_states_395_cast_fp16 = add(x = var_1755_cast_fp16, y = var_1769_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; tensor var_1771_shape_cast_fp16 = shape(x = hidden_states_395_cast_fp16)[name = string("op_1771_shape_cast_fp16")]; int32 gather_120 = const()[name = string("gather_120"), val = int32(1)]; int32 gather_121 = const()[name = string("gather_121"), val = int32(8)]; int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; string var_1771_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1771_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_122_indices_0_to_uint16 = const()[name = string("gather_122_indices_0_to_uint16"), val = uint16(2)]; tensor var_1771_shape_cast_fp16_to_uint16 = cast(dtype = var_1771_shape_cast_fp16_to_uint16_dtype_0, x = var_1771_shape_cast_fp16)[name = string("cast_343")]; uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = gather_122_indices_0_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_1771_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_123 = const()[name = string("gather_123"), val = int32(128)]; tensor var_1778_axes_0 = const()[name = string("op_1778_axes_0"), val = tensor([2])]; tensor var_1778_cast_fp16 = expand_dims(axes = var_1778_axes_0, x = hidden_states_395_cast_fp16)[name = string("op_1778_cast_fp16")]; int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)]; bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)]; int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_342")]; tensor concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (gather_120, gather_121, var_1670, gather_122_cast_uint16_to_int32, gather_123))[name = string("concat_69")]; tensor shape_16_cast_fp16 = shape(x = var_1778_cast_fp16)[name = string("shape_16_cast_fp16")]; int32 equal_16_y_0 = const()[name = string("equal_16_y_0"), val = int32(-1)]; tensor equal_16 = equal(x = concat_69, y = equal_16_y_0)[name = string("equal_16")]; tensor select_16 = select(a = shape_16_cast_fp16, b = concat_69, cond = equal_16)[name = string("select_16")]; tensor real_div_16 = real_div(x = select_16, y = shape_16_cast_fp16)[name = string("real_div_16")]; tensor hidden_states_397_cast_fp16 = tile(reps = real_div_16, x = var_1778_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; tensor concat_70x = const()[name = string("concat_70x"), val = tensor([1, 16, -1, 128])]; tensor key_states_17_cast_fp16 = reshape(shape = concat_70x, x = hidden_states_397_cast_fp16)[name = string("key_states_17_cast_fp16")]; tensor hidden_states_399_cast_fp16 = transpose(perm = hidden_states_399_perm_0, x = var_1735_cast_fp16)[name = string("transpose_77")]; tensor var_1788_shape_cast_fp16 = shape(x = hidden_states_399_cast_fp16)[name = string("op_1788_shape_cast_fp16")]; int32 gather_124 = const()[name = string("gather_124"), val = int32(1)]; int32 gather_125 = const()[name = string("gather_125"), val = int32(8)]; int32 gather_126_axis_0 = const()[name = string("gather_126_axis_0"), val = int32(0)]; int32 gather_126_batch_dims_0 = const()[name = string("gather_126_batch_dims_0"), val = int32(0)]; bool gather_126_validate_indices_0 = const()[name = string("gather_126_validate_indices_0"), val = bool(false)]; string var_1788_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1788_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_126_indices_0_to_uint16 = const()[name = string("gather_126_indices_0_to_uint16"), val = uint16(2)]; tensor var_1788_shape_cast_fp16_to_uint16 = cast(dtype = var_1788_shape_cast_fp16_to_uint16_dtype_0, x = var_1788_shape_cast_fp16)[name = string("cast_341")]; uint16 gather_126_cast_uint16 = gather(axis = gather_126_axis_0, batch_dims = gather_126_batch_dims_0, indices = gather_126_indices_0_to_uint16, validate_indices = gather_126_validate_indices_0, x = var_1788_shape_cast_fp16_to_uint16)[name = string("gather_126_cast_uint16")]; string gather_126_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_126_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_127 = const()[name = string("gather_127"), val = int32(128)]; tensor var_1795_axes_0 = const()[name = string("op_1795_axes_0"), val = tensor([2])]; tensor var_1795_cast_fp16 = expand_dims(axes = var_1795_axes_0, x = hidden_states_399_cast_fp16)[name = string("op_1795_cast_fp16")]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; int32 gather_126_cast_uint16_to_int32 = cast(dtype = gather_126_cast_uint16_to_int32_dtype_0, x = gather_126_cast_uint16)[name = string("cast_340")]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (gather_124, gather_125, var_1670, gather_126_cast_uint16_to_int32, gather_127))[name = string("concat_71")]; tensor shape_17_cast_fp16 = shape(x = var_1795_cast_fp16)[name = string("shape_17_cast_fp16")]; int32 equal_17_y_0 = const()[name = string("equal_17_y_0"), val = int32(-1)]; tensor equal_17 = equal(x = concat_71, y = equal_17_y_0)[name = string("equal_17")]; tensor select_17 = select(a = shape_17_cast_fp16, b = concat_71, cond = equal_17)[name = string("select_17")]; tensor real_div_17 = real_div(x = select_17, y = shape_17_cast_fp16)[name = string("real_div_17")]; tensor hidden_states_401_cast_fp16 = tile(reps = real_div_17, x = var_1795_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; tensor concat_72x = const()[name = string("concat_72x"), val = tensor([1, 16, -1, 128])]; tensor value_states_17_cast_fp16 = reshape(shape = concat_72x, x = hidden_states_401_cast_fp16)[name = string("value_states_17_cast_fp16")]; bool var_1806_transpose_x_1 = const()[name = string("op_1806_transpose_x_1"), val = bool(false)]; bool var_1806_transpose_y_1 = const()[name = string("op_1806_transpose_y_1"), val = bool(true)]; tensor var_1806_cast_fp16 = matmul(transpose_x = var_1806_transpose_x_1, transpose_y = var_1806_transpose_y_1, x = query_17_cast_fp16, y = key_states_17_cast_fp16)[name = string("op_1806_cast_fp16")]; fp16 var_1807_to_fp16 = const()[name = string("op_1807_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_33_cast_fp16 = mul(x = var_1806_cast_fp16, y = var_1807_to_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor input_99_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1810_cast_fp16 = softmax(axis = var_1671, x = input_99_cast_fp16)[name = string("op_1810_cast_fp16")]; bool attn_output_33_transpose_x_0 = const()[name = string("attn_output_33_transpose_x_0"), val = bool(false)]; bool attn_output_33_transpose_y_0 = const()[name = string("attn_output_33_transpose_y_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = var_1810_cast_fp16, y = value_states_17_cast_fp16)[name = string("attn_output_33_cast_fp16")]; tensor var_1814_perm_0 = const()[name = string("op_1814_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_73_axis_0 = const()[name = string("concat_73_axis_0"), val = int32(0)]; bool concat_73_interleave_0 = const()[name = string("concat_73_interleave_0"), val = bool(false)]; int32 gather_115_cast_uint16_to_int32 = cast(dtype = gather_115_cast_uint16_to_int32_dtype_0, x = gather_115_cast_uint16)[name = string("cast_344")]; tensor concat_73 = concat(axis = concat_73_axis_0, interleave = concat_73_interleave_0, values = (gather_114, gather_115_cast_uint16_to_int32, var_1671))[name = string("concat_73")]; tensor var_1814_cast_fp16 = transpose(perm = var_1814_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_76")]; tensor var_1817_cast_fp16 = reshape(shape = concat_73, x = var_1814_cast_fp16)[name = string("op_1817_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570723200)))]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = var_1817_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor hidden_states_405_cast_fp16 = add(x = hidden_states_369_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; fp16 var_1670_promoted_3_to_fp16 = const()[name = string("op_1670_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1824_cast_fp16 = pow(x = hidden_states_405_cast_fp16, y = var_1670_promoted_3_to_fp16)[name = string("op_1824_cast_fp16")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_1824_cast_fp16)[name = string("variance_71_cast_fp16")]; fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1828_cast_fp16 = add(x = variance_71_cast_fp16, y = var_1827_to_fp16)[name = string("op_1828_cast_fp16")]; fp32 var_1829_epsilon_0 = const()[name = string("op_1829_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1829_cast_fp16 = rsqrt(epsilon = var_1829_epsilon_0, x = var_1828_cast_fp16)[name = string("op_1829_cast_fp16")]; tensor hidden_states_409_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = var_1829_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; tensor layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574917568)))]; tensor input_105_cast_fp16 = mul(x = layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_409_cast_fp16)[name = string("input_105_cast_fp16")]; tensor layers_8_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_8_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574919680)))]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_1841_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1841_cast_fp16")]; tensor layers_8_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_8_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581211200)))]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_109_cast_fp16 = mul(x = var_1841_cast_fp16, y = linear_61_cast_fp16)[name = string("input_109_cast_fp16")]; tensor layers_8_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_8_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587502720)))]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; int32 var_1858 = const()[name = string("op_1858"), val = int32(2)]; int32 var_1859 = const()[name = string("op_1859"), val = int32(-1)]; fp16 var_1858_promoted_to_fp16 = const()[name = string("op_1858_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1868_cast_fp16 = pow(x = hidden_states_415_cast_fp16, y = var_1858_promoted_to_fp16)[name = string("op_1868_cast_fp16")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_1868_cast_fp16)[name = string("variance_73_cast_fp16")]; fp16 var_1871_to_fp16 = const()[name = string("op_1871_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1872_cast_fp16 = add(x = variance_73_cast_fp16, y = var_1871_to_fp16)[name = string("op_1872_cast_fp16")]; fp32 var_1873_epsilon_0 = const()[name = string("op_1873_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1873_cast_fp16 = rsqrt(epsilon = var_1873_epsilon_0, x = var_1872_cast_fp16)[name = string("op_1873_cast_fp16")]; tensor hidden_states_419_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = var_1873_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; tensor layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593794240)))]; tensor hidden_states_423_cast_fp16 = mul(x = layers_9_input_layernorm_weight_to_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; tensor var_1886_shape_cast_fp16 = shape(x = hidden_states_423_cast_fp16)[name = string("op_1886_shape_cast_fp16")]; int32 gather_128 = const()[name = string("gather_128"), val = int32(1)]; int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; string var_1886_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1886_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_129_indices_0_to_uint16 = const()[name = string("gather_129_indices_0_to_uint16"), val = uint16(1)]; tensor var_1886_shape_cast_fp16_to_uint16 = cast(dtype = var_1886_shape_cast_fp16_to_uint16_dtype_0, x = var_1886_shape_cast_fp16)[name = string("cast_339")]; uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = gather_129_indices_0_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1886_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593796352)))]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = hidden_states_423_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor concat_74x = const()[name = string("concat_74x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_425_cast_fp16 = reshape(shape = concat_74x, x = linear_63_cast_fp16)[name = string("hidden_states_425_cast_fp16")]; fp16 var_1858_promoted_1_to_fp16 = const()[name = string("op_1858_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_1894_cast_fp16 = pow(x = hidden_states_425_cast_fp16, y = var_1858_promoted_1_to_fp16)[name = string("op_1894_cast_fp16")]; tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_1894_cast_fp16)[name = string("variance_75_cast_fp16")]; fp16 var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1898_cast_fp16 = add(x = variance_75_cast_fp16, y = var_1897_to_fp16)[name = string("op_1898_cast_fp16")]; fp32 var_1899_epsilon_0 = const()[name = string("op_1899_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1899_cast_fp16 = rsqrt(epsilon = var_1899_epsilon_0, x = var_1898_cast_fp16)[name = string("op_1899_cast_fp16")]; tensor hidden_states_429_cast_fp16 = mul(x = hidden_states_425_cast_fp16, y = var_1899_cast_fp16)[name = string("hidden_states_429_cast_fp16")]; tensor layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597990720)))]; tensor var_1902_cast_fp16 = mul(x = layers_9_self_attn_q_norm_weight_to_fp16, y = hidden_states_429_cast_fp16)[name = string("op_1902_cast_fp16")]; tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597991040)))]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = hidden_states_423_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor concat_75x = const()[name = string("concat_75x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_433_cast_fp16 = reshape(shape = concat_75x, x = linear_64_cast_fp16)[name = string("hidden_states_433_cast_fp16")]; fp16 var_1858_promoted_2_to_fp16 = const()[name = string("op_1858_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_1910_cast_fp16 = pow(x = hidden_states_433_cast_fp16, y = var_1858_promoted_2_to_fp16)[name = string("op_1910_cast_fp16")]; tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_1910_cast_fp16)[name = string("variance_77_cast_fp16")]; fp16 var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1914_cast_fp16 = add(x = variance_77_cast_fp16, y = var_1913_to_fp16)[name = string("op_1914_cast_fp16")]; fp32 var_1915_epsilon_0 = const()[name = string("op_1915_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1915_cast_fp16 = rsqrt(epsilon = var_1915_epsilon_0, x = var_1914_cast_fp16)[name = string("op_1915_cast_fp16")]; tensor hidden_states_437_cast_fp16 = mul(x = hidden_states_433_cast_fp16, y = var_1915_cast_fp16)[name = string("hidden_states_437_cast_fp16")]; tensor layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600088256)))]; tensor var_1918_cast_fp16 = mul(x = layers_9_self_attn_k_norm_weight_to_fp16, y = hidden_states_437_cast_fp16)[name = string("op_1918_cast_fp16")]; tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600088576)))]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = hidden_states_423_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 8, 128])]; tensor var_1923_cast_fp16 = reshape(shape = concat_76x, x = linear_65_cast_fp16)[name = string("op_1923_cast_fp16")]; tensor hidden_states_445_perm_0 = const()[name = string("hidden_states_445_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1902_cast_fp16)[name = string("transpose_75")]; tensor var_1927_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1927_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1938_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_1938_cast_fp16")]; bool var_1940_interleave_0 = const()[name = string("op_1940_interleave_0"), val = bool(false)]; tensor var_1940_cast_fp16 = concat(axis = var_1859, interleave = var_1940_interleave_0, values = (var_1938_cast_fp16, x1_37_cast_fp16))[name = string("op_1940_cast_fp16")]; tensor var_1941_cast_fp16 = mul(x = var_1940_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1941_cast_fp16")]; tensor query_19_cast_fp16 = add(x = var_1927_cast_fp16, y = var_1941_cast_fp16)[name = string("query_19_cast_fp16")]; tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1918_cast_fp16)[name = string("transpose_74")]; tensor var_1943_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1943_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1954_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1954_cast_fp16")]; bool var_1956_interleave_0 = const()[name = string("op_1956_interleave_0"), val = bool(false)]; tensor var_1956_cast_fp16 = concat(axis = var_1859, interleave = var_1956_interleave_0, values = (var_1954_cast_fp16, x1_39_cast_fp16))[name = string("op_1956_cast_fp16")]; tensor var_1957_cast_fp16 = mul(x = var_1956_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1957_cast_fp16")]; tensor hidden_states_441_cast_fp16 = add(x = var_1943_cast_fp16, y = var_1957_cast_fp16)[name = string("hidden_states_441_cast_fp16")]; tensor var_1959_shape_cast_fp16 = shape(x = hidden_states_441_cast_fp16)[name = string("op_1959_shape_cast_fp16")]; int32 gather_134 = const()[name = string("gather_134"), val = int32(1)]; int32 gather_135 = const()[name = string("gather_135"), val = int32(8)]; int32 gather_136_axis_0 = const()[name = string("gather_136_axis_0"), val = int32(0)]; int32 gather_136_batch_dims_0 = const()[name = string("gather_136_batch_dims_0"), val = int32(0)]; bool gather_136_validate_indices_0 = const()[name = string("gather_136_validate_indices_0"), val = bool(false)]; string var_1959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_136_indices_0_to_uint16 = const()[name = string("gather_136_indices_0_to_uint16"), val = uint16(2)]; tensor var_1959_shape_cast_fp16_to_uint16 = cast(dtype = var_1959_shape_cast_fp16_to_uint16_dtype_0, x = var_1959_shape_cast_fp16)[name = string("cast_337")]; uint16 gather_136_cast_uint16 = gather(axis = gather_136_axis_0, batch_dims = gather_136_batch_dims_0, indices = gather_136_indices_0_to_uint16, validate_indices = gather_136_validate_indices_0, x = var_1959_shape_cast_fp16_to_uint16)[name = string("gather_136_cast_uint16")]; string gather_136_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_136_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_137 = const()[name = string("gather_137"), val = int32(128)]; tensor var_1966_axes_0 = const()[name = string("op_1966_axes_0"), val = tensor([2])]; tensor var_1966_cast_fp16 = expand_dims(axes = var_1966_axes_0, x = hidden_states_441_cast_fp16)[name = string("op_1966_cast_fp16")]; int32 concat_77_axis_0 = const()[name = string("concat_77_axis_0"), val = int32(0)]; bool concat_77_interleave_0 = const()[name = string("concat_77_interleave_0"), val = bool(false)]; int32 gather_136_cast_uint16_to_int32 = cast(dtype = gather_136_cast_uint16_to_int32_dtype_0, x = gather_136_cast_uint16)[name = string("cast_336")]; tensor concat_77 = concat(axis = concat_77_axis_0, interleave = concat_77_interleave_0, values = (gather_134, gather_135, var_1858, gather_136_cast_uint16_to_int32, gather_137))[name = string("concat_77")]; tensor shape_18_cast_fp16 = shape(x = var_1966_cast_fp16)[name = string("shape_18_cast_fp16")]; int32 equal_18_y_0 = const()[name = string("equal_18_y_0"), val = int32(-1)]; tensor equal_18 = equal(x = concat_77, y = equal_18_y_0)[name = string("equal_18")]; tensor select_18 = select(a = shape_18_cast_fp16, b = concat_77, cond = equal_18)[name = string("select_18")]; tensor real_div_18 = real_div(x = select_18, y = shape_18_cast_fp16)[name = string("real_div_18")]; tensor hidden_states_443_cast_fp16 = tile(reps = real_div_18, x = var_1966_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, 16, -1, 128])]; tensor key_states_19_cast_fp16 = reshape(shape = concat_78x, x = hidden_states_443_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor hidden_states_445_cast_fp16 = transpose(perm = hidden_states_445_perm_0, x = var_1923_cast_fp16)[name = string("transpose_73")]; tensor var_1976_shape_cast_fp16 = shape(x = hidden_states_445_cast_fp16)[name = string("op_1976_shape_cast_fp16")]; int32 gather_138 = const()[name = string("gather_138"), val = int32(1)]; int32 gather_139 = const()[name = string("gather_139"), val = int32(8)]; int32 gather_140_axis_0 = const()[name = string("gather_140_axis_0"), val = int32(0)]; int32 gather_140_batch_dims_0 = const()[name = string("gather_140_batch_dims_0"), val = int32(0)]; bool gather_140_validate_indices_0 = const()[name = string("gather_140_validate_indices_0"), val = bool(false)]; string var_1976_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1976_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_140_indices_0_to_uint16 = const()[name = string("gather_140_indices_0_to_uint16"), val = uint16(2)]; tensor var_1976_shape_cast_fp16_to_uint16 = cast(dtype = var_1976_shape_cast_fp16_to_uint16_dtype_0, x = var_1976_shape_cast_fp16)[name = string("cast_335")]; uint16 gather_140_cast_uint16 = gather(axis = gather_140_axis_0, batch_dims = gather_140_batch_dims_0, indices = gather_140_indices_0_to_uint16, validate_indices = gather_140_validate_indices_0, x = var_1976_shape_cast_fp16_to_uint16)[name = string("gather_140_cast_uint16")]; string gather_140_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_140_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_141 = const()[name = string("gather_141"), val = int32(128)]; tensor var_1983_axes_0 = const()[name = string("op_1983_axes_0"), val = tensor([2])]; tensor var_1983_cast_fp16 = expand_dims(axes = var_1983_axes_0, x = hidden_states_445_cast_fp16)[name = string("op_1983_cast_fp16")]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; int32 gather_140_cast_uint16_to_int32 = cast(dtype = gather_140_cast_uint16_to_int32_dtype_0, x = gather_140_cast_uint16)[name = string("cast_334")]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (gather_138, gather_139, var_1858, gather_140_cast_uint16_to_int32, gather_141))[name = string("concat_79")]; tensor shape_19_cast_fp16 = shape(x = var_1983_cast_fp16)[name = string("shape_19_cast_fp16")]; int32 equal_19_y_0 = const()[name = string("equal_19_y_0"), val = int32(-1)]; tensor equal_19 = equal(x = concat_79, y = equal_19_y_0)[name = string("equal_19")]; tensor select_19 = select(a = shape_19_cast_fp16, b = concat_79, cond = equal_19)[name = string("select_19")]; tensor real_div_19 = real_div(x = select_19, y = shape_19_cast_fp16)[name = string("real_div_19")]; tensor hidden_states_447_cast_fp16 = tile(reps = real_div_19, x = var_1983_cast_fp16)[name = string("hidden_states_447_cast_fp16")]; tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, 16, -1, 128])]; tensor value_states_19_cast_fp16 = reshape(shape = concat_80x, x = hidden_states_447_cast_fp16)[name = string("value_states_19_cast_fp16")]; bool var_1994_transpose_x_1 = const()[name = string("op_1994_transpose_x_1"), val = bool(false)]; bool var_1994_transpose_y_1 = const()[name = string("op_1994_transpose_y_1"), val = bool(true)]; tensor var_1994_cast_fp16 = matmul(transpose_x = var_1994_transpose_x_1, transpose_y = var_1994_transpose_y_1, x = query_19_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1994_cast_fp16")]; fp16 var_1995_to_fp16 = const()[name = string("op_1995_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_1994_cast_fp16, y = var_1995_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor input_111_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_111_cast_fp16")]; tensor var_1998_cast_fp16 = softmax(axis = var_1859, x = input_111_cast_fp16)[name = string("op_1998_cast_fp16")]; bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_1998_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_2002_perm_0 = const()[name = string("op_2002_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; int32 gather_129_cast_uint16_to_int32 = cast(dtype = gather_129_cast_uint16_to_int32_dtype_0, x = gather_129_cast_uint16)[name = string("cast_338")]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_128, gather_129_cast_uint16_to_int32, var_1859))[name = string("concat_81")]; tensor var_2002_cast_fp16 = transpose(perm = var_2002_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_72")]; tensor var_2005_cast_fp16 = reshape(shape = concat_81, x = var_2002_cast_fp16)[name = string("op_2005_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602185792)))]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = var_2005_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor hidden_states_451_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_451_cast_fp16")]; fp16 var_1858_promoted_3_to_fp16 = const()[name = string("op_1858_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2012_cast_fp16 = pow(x = hidden_states_451_cast_fp16, y = var_1858_promoted_3_to_fp16)[name = string("op_2012_cast_fp16")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_2012_cast_fp16)[name = string("variance_79_cast_fp16")]; fp16 var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2016_cast_fp16 = add(x = variance_79_cast_fp16, y = var_2015_to_fp16)[name = string("op_2016_cast_fp16")]; fp32 var_2017_epsilon_0 = const()[name = string("op_2017_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2017_cast_fp16 = rsqrt(epsilon = var_2017_epsilon_0, x = var_2016_cast_fp16)[name = string("op_2017_cast_fp16")]; tensor hidden_states_455_cast_fp16 = mul(x = hidden_states_451_cast_fp16, y = var_2017_cast_fp16)[name = string("hidden_states_455_cast_fp16")]; tensor layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606380160)))]; tensor input_117_cast_fp16 = mul(x = layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_455_cast_fp16)[name = string("input_117_cast_fp16")]; tensor layers_9_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_9_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606382272)))]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_2029_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_2029_cast_fp16")]; tensor layers_9_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_9_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612673792)))]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_121_cast_fp16 = mul(x = var_2029_cast_fp16, y = linear_68_cast_fp16)[name = string("input_121_cast_fp16")]; tensor layers_9_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_9_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618965312)))]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor hidden_states_461_cast_fp16 = add(x = hidden_states_451_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(2)]; int32 var_2047 = const()[name = string("op_2047"), val = int32(-1)]; fp16 var_2046_promoted_to_fp16 = const()[name = string("op_2046_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2056_cast_fp16 = pow(x = hidden_states_461_cast_fp16, y = var_2046_promoted_to_fp16)[name = string("op_2056_cast_fp16")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_2056_cast_fp16)[name = string("variance_81_cast_fp16")]; fp16 var_2059_to_fp16 = const()[name = string("op_2059_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2060_cast_fp16 = add(x = variance_81_cast_fp16, y = var_2059_to_fp16)[name = string("op_2060_cast_fp16")]; fp32 var_2061_epsilon_0 = const()[name = string("op_2061_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2061_cast_fp16 = rsqrt(epsilon = var_2061_epsilon_0, x = var_2060_cast_fp16)[name = string("op_2061_cast_fp16")]; tensor hidden_states_465_cast_fp16 = mul(x = hidden_states_461_cast_fp16, y = var_2061_cast_fp16)[name = string("hidden_states_465_cast_fp16")]; tensor layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625256832)))]; tensor hidden_states_469_cast_fp16 = mul(x = layers_10_input_layernorm_weight_to_fp16, y = hidden_states_465_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; tensor var_2074_shape_cast_fp16 = shape(x = hidden_states_469_cast_fp16)[name = string("op_2074_shape_cast_fp16")]; int32 gather_142 = const()[name = string("gather_142"), val = int32(1)]; int32 gather_143_axis_0 = const()[name = string("gather_143_axis_0"), val = int32(0)]; int32 gather_143_batch_dims_0 = const()[name = string("gather_143_batch_dims_0"), val = int32(0)]; bool gather_143_validate_indices_0 = const()[name = string("gather_143_validate_indices_0"), val = bool(false)]; string var_2074_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2074_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_143_indices_0_to_uint16 = const()[name = string("gather_143_indices_0_to_uint16"), val = uint16(1)]; tensor var_2074_shape_cast_fp16_to_uint16 = cast(dtype = var_2074_shape_cast_fp16_to_uint16_dtype_0, x = var_2074_shape_cast_fp16)[name = string("cast_333")]; uint16 gather_143_cast_uint16 = gather(axis = gather_143_axis_0, batch_dims = gather_143_batch_dims_0, indices = gather_143_indices_0_to_uint16, validate_indices = gather_143_validate_indices_0, x = var_2074_shape_cast_fp16_to_uint16)[name = string("gather_143_cast_uint16")]; string gather_143_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_143_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625258944)))]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = hidden_states_469_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor concat_82x = const()[name = string("concat_82x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_471_cast_fp16 = reshape(shape = concat_82x, x = linear_70_cast_fp16)[name = string("hidden_states_471_cast_fp16")]; fp16 var_2046_promoted_1_to_fp16 = const()[name = string("op_2046_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_2082_cast_fp16 = pow(x = hidden_states_471_cast_fp16, y = var_2046_promoted_1_to_fp16)[name = string("op_2082_cast_fp16")]; tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_2082_cast_fp16)[name = string("variance_83_cast_fp16")]; fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2086_cast_fp16 = add(x = variance_83_cast_fp16, y = var_2085_to_fp16)[name = string("op_2086_cast_fp16")]; fp32 var_2087_epsilon_0 = const()[name = string("op_2087_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2087_cast_fp16 = rsqrt(epsilon = var_2087_epsilon_0, x = var_2086_cast_fp16)[name = string("op_2087_cast_fp16")]; tensor hidden_states_475_cast_fp16 = mul(x = hidden_states_471_cast_fp16, y = var_2087_cast_fp16)[name = string("hidden_states_475_cast_fp16")]; tensor layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629453312)))]; tensor var_2090_cast_fp16 = mul(x = layers_10_self_attn_q_norm_weight_to_fp16, y = hidden_states_475_cast_fp16)[name = string("op_2090_cast_fp16")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629453632)))]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = hidden_states_469_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_479_cast_fp16 = reshape(shape = concat_83x, x = linear_71_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; fp16 var_2046_promoted_2_to_fp16 = const()[name = string("op_2046_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_2098_cast_fp16 = pow(x = hidden_states_479_cast_fp16, y = var_2046_promoted_2_to_fp16)[name = string("op_2098_cast_fp16")]; tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_2098_cast_fp16)[name = string("variance_85_cast_fp16")]; fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2102_cast_fp16 = add(x = variance_85_cast_fp16, y = var_2101_to_fp16)[name = string("op_2102_cast_fp16")]; fp32 var_2103_epsilon_0 = const()[name = string("op_2103_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2103_cast_fp16 = rsqrt(epsilon = var_2103_epsilon_0, x = var_2102_cast_fp16)[name = string("op_2103_cast_fp16")]; tensor hidden_states_483_cast_fp16 = mul(x = hidden_states_479_cast_fp16, y = var_2103_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; tensor layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631550848)))]; tensor var_2106_cast_fp16 = mul(x = layers_10_self_attn_k_norm_weight_to_fp16, y = hidden_states_483_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631551168)))]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = hidden_states_469_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor concat_84x = const()[name = string("concat_84x"), val = tensor([1, -1, 8, 128])]; tensor var_2111_cast_fp16 = reshape(shape = concat_84x, x = linear_72_cast_fp16)[name = string("op_2111_cast_fp16")]; tensor hidden_states_491_perm_0 = const()[name = string("hidden_states_491_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_2090_cast_fp16)[name = string("transpose_71")]; tensor var_2115_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2115_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2126_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2126_cast_fp16")]; bool var_2128_interleave_0 = const()[name = string("op_2128_interleave_0"), val = bool(false)]; tensor var_2128_cast_fp16 = concat(axis = var_2047, interleave = var_2128_interleave_0, values = (var_2126_cast_fp16, x1_41_cast_fp16))[name = string("op_2128_cast_fp16")]; tensor var_2129_cast_fp16 = mul(x = var_2128_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2129_cast_fp16")]; tensor query_21_cast_fp16 = add(x = var_2115_cast_fp16, y = var_2129_cast_fp16)[name = string("query_21_cast_fp16")]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_2106_cast_fp16)[name = string("transpose_70")]; tensor var_2131_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2131_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2142_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2142_cast_fp16")]; bool var_2144_interleave_0 = const()[name = string("op_2144_interleave_0"), val = bool(false)]; tensor var_2144_cast_fp16 = concat(axis = var_2047, interleave = var_2144_interleave_0, values = (var_2142_cast_fp16, x1_43_cast_fp16))[name = string("op_2144_cast_fp16")]; tensor var_2145_cast_fp16 = mul(x = var_2144_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2145_cast_fp16")]; tensor hidden_states_487_cast_fp16 = add(x = var_2131_cast_fp16, y = var_2145_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; tensor var_2147_shape_cast_fp16 = shape(x = hidden_states_487_cast_fp16)[name = string("op_2147_shape_cast_fp16")]; int32 gather_148 = const()[name = string("gather_148"), val = int32(1)]; int32 gather_149 = const()[name = string("gather_149"), val = int32(8)]; int32 gather_150_axis_0 = const()[name = string("gather_150_axis_0"), val = int32(0)]; int32 gather_150_batch_dims_0 = const()[name = string("gather_150_batch_dims_0"), val = int32(0)]; bool gather_150_validate_indices_0 = const()[name = string("gather_150_validate_indices_0"), val = bool(false)]; string var_2147_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2147_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_150_indices_0_to_uint16 = const()[name = string("gather_150_indices_0_to_uint16"), val = uint16(2)]; tensor var_2147_shape_cast_fp16_to_uint16 = cast(dtype = var_2147_shape_cast_fp16_to_uint16_dtype_0, x = var_2147_shape_cast_fp16)[name = string("cast_331")]; uint16 gather_150_cast_uint16 = gather(axis = gather_150_axis_0, batch_dims = gather_150_batch_dims_0, indices = gather_150_indices_0_to_uint16, validate_indices = gather_150_validate_indices_0, x = var_2147_shape_cast_fp16_to_uint16)[name = string("gather_150_cast_uint16")]; string gather_150_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_150_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_151 = const()[name = string("gather_151"), val = int32(128)]; tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([2])]; tensor var_2154_cast_fp16 = expand_dims(axes = var_2154_axes_0, x = hidden_states_487_cast_fp16)[name = string("op_2154_cast_fp16")]; int32 concat_85_axis_0 = const()[name = string("concat_85_axis_0"), val = int32(0)]; bool concat_85_interleave_0 = const()[name = string("concat_85_interleave_0"), val = bool(false)]; int32 gather_150_cast_uint16_to_int32 = cast(dtype = gather_150_cast_uint16_to_int32_dtype_0, x = gather_150_cast_uint16)[name = string("cast_330")]; tensor concat_85 = concat(axis = concat_85_axis_0, interleave = concat_85_interleave_0, values = (gather_148, gather_149, var_2046, gather_150_cast_uint16_to_int32, gather_151))[name = string("concat_85")]; tensor shape_20_cast_fp16 = shape(x = var_2154_cast_fp16)[name = string("shape_20_cast_fp16")]; int32 equal_20_y_0 = const()[name = string("equal_20_y_0"), val = int32(-1)]; tensor equal_20 = equal(x = concat_85, y = equal_20_y_0)[name = string("equal_20")]; tensor select_20 = select(a = shape_20_cast_fp16, b = concat_85, cond = equal_20)[name = string("select_20")]; tensor real_div_20 = real_div(x = select_20, y = shape_20_cast_fp16)[name = string("real_div_20")]; tensor hidden_states_489_cast_fp16 = tile(reps = real_div_20, x = var_2154_cast_fp16)[name = string("hidden_states_489_cast_fp16")]; tensor concat_86x = const()[name = string("concat_86x"), val = tensor([1, 16, -1, 128])]; tensor key_states_21_cast_fp16 = reshape(shape = concat_86x, x = hidden_states_489_cast_fp16)[name = string("key_states_21_cast_fp16")]; tensor hidden_states_491_cast_fp16 = transpose(perm = hidden_states_491_perm_0, x = var_2111_cast_fp16)[name = string("transpose_69")]; tensor var_2164_shape_cast_fp16 = shape(x = hidden_states_491_cast_fp16)[name = string("op_2164_shape_cast_fp16")]; int32 gather_152 = const()[name = string("gather_152"), val = int32(1)]; int32 gather_153 = const()[name = string("gather_153"), val = int32(8)]; int32 gather_154_axis_0 = const()[name = string("gather_154_axis_0"), val = int32(0)]; int32 gather_154_batch_dims_0 = const()[name = string("gather_154_batch_dims_0"), val = int32(0)]; bool gather_154_validate_indices_0 = const()[name = string("gather_154_validate_indices_0"), val = bool(false)]; string var_2164_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2164_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_154_indices_0_to_uint16 = const()[name = string("gather_154_indices_0_to_uint16"), val = uint16(2)]; tensor var_2164_shape_cast_fp16_to_uint16 = cast(dtype = var_2164_shape_cast_fp16_to_uint16_dtype_0, x = var_2164_shape_cast_fp16)[name = string("cast_329")]; uint16 gather_154_cast_uint16 = gather(axis = gather_154_axis_0, batch_dims = gather_154_batch_dims_0, indices = gather_154_indices_0_to_uint16, validate_indices = gather_154_validate_indices_0, x = var_2164_shape_cast_fp16_to_uint16)[name = string("gather_154_cast_uint16")]; string gather_154_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_154_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_155 = const()[name = string("gather_155"), val = int32(128)]; tensor var_2171_axes_0 = const()[name = string("op_2171_axes_0"), val = tensor([2])]; tensor var_2171_cast_fp16 = expand_dims(axes = var_2171_axes_0, x = hidden_states_491_cast_fp16)[name = string("op_2171_cast_fp16")]; int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; int32 gather_154_cast_uint16_to_int32 = cast(dtype = gather_154_cast_uint16_to_int32_dtype_0, x = gather_154_cast_uint16)[name = string("cast_328")]; tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (gather_152, gather_153, var_2046, gather_154_cast_uint16_to_int32, gather_155))[name = string("concat_87")]; tensor shape_21_cast_fp16 = shape(x = var_2171_cast_fp16)[name = string("shape_21_cast_fp16")]; int32 equal_21_y_0 = const()[name = string("equal_21_y_0"), val = int32(-1)]; tensor equal_21 = equal(x = concat_87, y = equal_21_y_0)[name = string("equal_21")]; tensor select_21 = select(a = shape_21_cast_fp16, b = concat_87, cond = equal_21)[name = string("select_21")]; tensor real_div_21 = real_div(x = select_21, y = shape_21_cast_fp16)[name = string("real_div_21")]; tensor hidden_states_493_cast_fp16 = tile(reps = real_div_21, x = var_2171_cast_fp16)[name = string("hidden_states_493_cast_fp16")]; tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, 16, -1, 128])]; tensor value_states_21_cast_fp16 = reshape(shape = concat_88x, x = hidden_states_493_cast_fp16)[name = string("value_states_21_cast_fp16")]; bool var_2182_transpose_x_1 = const()[name = string("op_2182_transpose_x_1"), val = bool(false)]; bool var_2182_transpose_y_1 = const()[name = string("op_2182_transpose_y_1"), val = bool(true)]; tensor var_2182_cast_fp16 = matmul(transpose_x = var_2182_transpose_x_1, transpose_y = var_2182_transpose_y_1, x = query_21_cast_fp16, y = key_states_21_cast_fp16)[name = string("op_2182_cast_fp16")]; fp16 var_2183_to_fp16 = const()[name = string("op_2183_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_41_cast_fp16 = mul(x = var_2182_cast_fp16, y = var_2183_to_fp16)[name = string("attn_weights_41_cast_fp16")]; tensor input_123_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_123_cast_fp16")]; tensor var_2186_cast_fp16 = softmax(axis = var_2047, x = input_123_cast_fp16)[name = string("op_2186_cast_fp16")]; bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = var_2186_cast_fp16, y = value_states_21_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_2190_perm_0 = const()[name = string("op_2190_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; int32 gather_143_cast_uint16_to_int32 = cast(dtype = gather_143_cast_uint16_to_int32_dtype_0, x = gather_143_cast_uint16)[name = string("cast_332")]; tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_142, gather_143_cast_uint16_to_int32, var_2047))[name = string("concat_89")]; tensor var_2190_cast_fp16 = transpose(perm = var_2190_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_68")]; tensor var_2193_cast_fp16 = reshape(shape = concat_89, x = var_2190_cast_fp16)[name = string("op_2193_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633648384)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = var_2193_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor hidden_states_497_cast_fp16 = add(x = hidden_states_461_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_497_cast_fp16")]; fp16 var_2046_promoted_3_to_fp16 = const()[name = string("op_2046_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2200_cast_fp16 = pow(x = hidden_states_497_cast_fp16, y = var_2046_promoted_3_to_fp16)[name = string("op_2200_cast_fp16")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_2200_cast_fp16)[name = string("variance_87_cast_fp16")]; fp16 var_2203_to_fp16 = const()[name = string("op_2203_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2204_cast_fp16 = add(x = variance_87_cast_fp16, y = var_2203_to_fp16)[name = string("op_2204_cast_fp16")]; fp32 var_2205_epsilon_0 = const()[name = string("op_2205_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2205_cast_fp16 = rsqrt(epsilon = var_2205_epsilon_0, x = var_2204_cast_fp16)[name = string("op_2205_cast_fp16")]; tensor hidden_states_501_cast_fp16 = mul(x = hidden_states_497_cast_fp16, y = var_2205_cast_fp16)[name = string("hidden_states_501_cast_fp16")]; tensor layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(637842752)))]; tensor input_129_cast_fp16 = mul(x = layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_501_cast_fp16)[name = string("input_129_cast_fp16")]; tensor layers_10_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_10_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(637844864)))]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2217_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2217_cast_fp16")]; tensor layers_10_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_10_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644136384)))]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_133_cast_fp16 = mul(x = var_2217_cast_fp16, y = linear_75_cast_fp16)[name = string("input_133_cast_fp16")]; tensor layers_10_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_10_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650427904)))]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor hidden_states_507_cast_fp16 = add(x = hidden_states_497_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_507_cast_fp16")]; int32 var_2234 = const()[name = string("op_2234"), val = int32(2)]; int32 var_2235 = const()[name = string("op_2235"), val = int32(-1)]; fp16 var_2234_promoted_to_fp16 = const()[name = string("op_2234_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2244_cast_fp16 = pow(x = hidden_states_507_cast_fp16, y = var_2234_promoted_to_fp16)[name = string("op_2244_cast_fp16")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_2244_cast_fp16)[name = string("variance_89_cast_fp16")]; fp16 var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2248_cast_fp16 = add(x = variance_89_cast_fp16, y = var_2247_to_fp16)[name = string("op_2248_cast_fp16")]; fp32 var_2249_epsilon_0 = const()[name = string("op_2249_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2249_cast_fp16 = rsqrt(epsilon = var_2249_epsilon_0, x = var_2248_cast_fp16)[name = string("op_2249_cast_fp16")]; tensor hidden_states_511_cast_fp16 = mul(x = hidden_states_507_cast_fp16, y = var_2249_cast_fp16)[name = string("hidden_states_511_cast_fp16")]; tensor layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656719424)))]; tensor hidden_states_515_cast_fp16 = mul(x = layers_11_input_layernorm_weight_to_fp16, y = hidden_states_511_cast_fp16)[name = string("hidden_states_515_cast_fp16")]; tensor var_2262_shape_cast_fp16 = shape(x = hidden_states_515_cast_fp16)[name = string("op_2262_shape_cast_fp16")]; int32 gather_156 = const()[name = string("gather_156"), val = int32(1)]; int32 gather_157_axis_0 = const()[name = string("gather_157_axis_0"), val = int32(0)]; int32 gather_157_batch_dims_0 = const()[name = string("gather_157_batch_dims_0"), val = int32(0)]; bool gather_157_validate_indices_0 = const()[name = string("gather_157_validate_indices_0"), val = bool(false)]; string var_2262_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2262_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_157_indices_0_to_uint16 = const()[name = string("gather_157_indices_0_to_uint16"), val = uint16(1)]; tensor var_2262_shape_cast_fp16_to_uint16 = cast(dtype = var_2262_shape_cast_fp16_to_uint16_dtype_0, x = var_2262_shape_cast_fp16)[name = string("cast_327")]; uint16 gather_157_cast_uint16 = gather(axis = gather_157_axis_0, batch_dims = gather_157_batch_dims_0, indices = gather_157_indices_0_to_uint16, validate_indices = gather_157_validate_indices_0, x = var_2262_shape_cast_fp16_to_uint16)[name = string("gather_157_cast_uint16")]; string gather_157_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_157_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656721536)))]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = hidden_states_515_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_517_cast_fp16 = reshape(shape = concat_90x, x = linear_77_cast_fp16)[name = string("hidden_states_517_cast_fp16")]; fp16 var_2234_promoted_1_to_fp16 = const()[name = string("op_2234_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_2270_cast_fp16 = pow(x = hidden_states_517_cast_fp16, y = var_2234_promoted_1_to_fp16)[name = string("op_2270_cast_fp16")]; tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_2270_cast_fp16)[name = string("variance_91_cast_fp16")]; fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2274_cast_fp16 = add(x = variance_91_cast_fp16, y = var_2273_to_fp16)[name = string("op_2274_cast_fp16")]; fp32 var_2275_epsilon_0 = const()[name = string("op_2275_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2275_cast_fp16 = rsqrt(epsilon = var_2275_epsilon_0, x = var_2274_cast_fp16)[name = string("op_2275_cast_fp16")]; tensor hidden_states_521_cast_fp16 = mul(x = hidden_states_517_cast_fp16, y = var_2275_cast_fp16)[name = string("hidden_states_521_cast_fp16")]; tensor layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660915904)))]; tensor var_2278_cast_fp16 = mul(x = layers_11_self_attn_q_norm_weight_to_fp16, y = hidden_states_521_cast_fp16)[name = string("op_2278_cast_fp16")]; tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660916224)))]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = hidden_states_515_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor concat_91x = const()[name = string("concat_91x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_525_cast_fp16 = reshape(shape = concat_91x, x = linear_78_cast_fp16)[name = string("hidden_states_525_cast_fp16")]; fp16 var_2234_promoted_2_to_fp16 = const()[name = string("op_2234_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_2286_cast_fp16 = pow(x = hidden_states_525_cast_fp16, y = var_2234_promoted_2_to_fp16)[name = string("op_2286_cast_fp16")]; tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_2286_cast_fp16)[name = string("variance_93_cast_fp16")]; fp16 var_2289_to_fp16 = const()[name = string("op_2289_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2290_cast_fp16 = add(x = variance_93_cast_fp16, y = var_2289_to_fp16)[name = string("op_2290_cast_fp16")]; fp32 var_2291_epsilon_0 = const()[name = string("op_2291_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2291_cast_fp16 = rsqrt(epsilon = var_2291_epsilon_0, x = var_2290_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor hidden_states_529_cast_fp16 = mul(x = hidden_states_525_cast_fp16, y = var_2291_cast_fp16)[name = string("hidden_states_529_cast_fp16")]; tensor layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663013440)))]; tensor var_2294_cast_fp16 = mul(x = layers_11_self_attn_k_norm_weight_to_fp16, y = hidden_states_529_cast_fp16)[name = string("op_2294_cast_fp16")]; tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663013760)))]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = hidden_states_515_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, -1, 8, 128])]; tensor var_2299_cast_fp16 = reshape(shape = concat_92x, x = linear_79_cast_fp16)[name = string("op_2299_cast_fp16")]; tensor hidden_states_537_perm_0 = const()[name = string("hidden_states_537_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2278_cast_fp16)[name = string("transpose_67")]; tensor var_2303_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2303_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2314_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2314_cast_fp16")]; bool var_2316_interleave_0 = const()[name = string("op_2316_interleave_0"), val = bool(false)]; tensor var_2316_cast_fp16 = concat(axis = var_2235, interleave = var_2316_interleave_0, values = (var_2314_cast_fp16, x1_45_cast_fp16))[name = string("op_2316_cast_fp16")]; tensor var_2317_cast_fp16 = mul(x = var_2316_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2317_cast_fp16")]; tensor query_23_cast_fp16 = add(x = var_2303_cast_fp16, y = var_2317_cast_fp16)[name = string("query_23_cast_fp16")]; tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2294_cast_fp16)[name = string("transpose_66")]; tensor var_2319_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2330_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2330_cast_fp16")]; bool var_2332_interleave_0 = const()[name = string("op_2332_interleave_0"), val = bool(false)]; tensor var_2332_cast_fp16 = concat(axis = var_2235, interleave = var_2332_interleave_0, values = (var_2330_cast_fp16, x1_47_cast_fp16))[name = string("op_2332_cast_fp16")]; tensor var_2333_cast_fp16 = mul(x = var_2332_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2333_cast_fp16")]; tensor hidden_states_533_cast_fp16 = add(x = var_2319_cast_fp16, y = var_2333_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; tensor var_2335_shape_cast_fp16 = shape(x = hidden_states_533_cast_fp16)[name = string("op_2335_shape_cast_fp16")]; int32 gather_162 = const()[name = string("gather_162"), val = int32(1)]; int32 gather_163 = const()[name = string("gather_163"), val = int32(8)]; int32 gather_164_axis_0 = const()[name = string("gather_164_axis_0"), val = int32(0)]; int32 gather_164_batch_dims_0 = const()[name = string("gather_164_batch_dims_0"), val = int32(0)]; bool gather_164_validate_indices_0 = const()[name = string("gather_164_validate_indices_0"), val = bool(false)]; string var_2335_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2335_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_164_indices_0_to_uint16 = const()[name = string("gather_164_indices_0_to_uint16"), val = uint16(2)]; tensor var_2335_shape_cast_fp16_to_uint16 = cast(dtype = var_2335_shape_cast_fp16_to_uint16_dtype_0, x = var_2335_shape_cast_fp16)[name = string("cast_325")]; uint16 gather_164_cast_uint16 = gather(axis = gather_164_axis_0, batch_dims = gather_164_batch_dims_0, indices = gather_164_indices_0_to_uint16, validate_indices = gather_164_validate_indices_0, x = var_2335_shape_cast_fp16_to_uint16)[name = string("gather_164_cast_uint16")]; string gather_164_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_164_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_165 = const()[name = string("gather_165"), val = int32(128)]; tensor var_2342_axes_0 = const()[name = string("op_2342_axes_0"), val = tensor([2])]; tensor var_2342_cast_fp16 = expand_dims(axes = var_2342_axes_0, x = hidden_states_533_cast_fp16)[name = string("op_2342_cast_fp16")]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; int32 gather_164_cast_uint16_to_int32 = cast(dtype = gather_164_cast_uint16_to_int32_dtype_0, x = gather_164_cast_uint16)[name = string("cast_324")]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (gather_162, gather_163, var_2234, gather_164_cast_uint16_to_int32, gather_165))[name = string("concat_93")]; tensor shape_22_cast_fp16 = shape(x = var_2342_cast_fp16)[name = string("shape_22_cast_fp16")]; int32 equal_22_y_0 = const()[name = string("equal_22_y_0"), val = int32(-1)]; tensor equal_22 = equal(x = concat_93, y = equal_22_y_0)[name = string("equal_22")]; tensor select_22 = select(a = shape_22_cast_fp16, b = concat_93, cond = equal_22)[name = string("select_22")]; tensor real_div_22 = real_div(x = select_22, y = shape_22_cast_fp16)[name = string("real_div_22")]; tensor hidden_states_535_cast_fp16 = tile(reps = real_div_22, x = var_2342_cast_fp16)[name = string("hidden_states_535_cast_fp16")]; tensor concat_94x = const()[name = string("concat_94x"), val = tensor([1, 16, -1, 128])]; tensor key_states_23_cast_fp16 = reshape(shape = concat_94x, x = hidden_states_535_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor hidden_states_537_cast_fp16 = transpose(perm = hidden_states_537_perm_0, x = var_2299_cast_fp16)[name = string("transpose_65")]; tensor var_2352_shape_cast_fp16 = shape(x = hidden_states_537_cast_fp16)[name = string("op_2352_shape_cast_fp16")]; int32 gather_166 = const()[name = string("gather_166"), val = int32(1)]; int32 gather_167 = const()[name = string("gather_167"), val = int32(8)]; int32 gather_168_axis_0 = const()[name = string("gather_168_axis_0"), val = int32(0)]; int32 gather_168_batch_dims_0 = const()[name = string("gather_168_batch_dims_0"), val = int32(0)]; bool gather_168_validate_indices_0 = const()[name = string("gather_168_validate_indices_0"), val = bool(false)]; string var_2352_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2352_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_168_indices_0_to_uint16 = const()[name = string("gather_168_indices_0_to_uint16"), val = uint16(2)]; tensor var_2352_shape_cast_fp16_to_uint16 = cast(dtype = var_2352_shape_cast_fp16_to_uint16_dtype_0, x = var_2352_shape_cast_fp16)[name = string("cast_323")]; uint16 gather_168_cast_uint16 = gather(axis = gather_168_axis_0, batch_dims = gather_168_batch_dims_0, indices = gather_168_indices_0_to_uint16, validate_indices = gather_168_validate_indices_0, x = var_2352_shape_cast_fp16_to_uint16)[name = string("gather_168_cast_uint16")]; string gather_168_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_168_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_169 = const()[name = string("gather_169"), val = int32(128)]; tensor var_2359_axes_0 = const()[name = string("op_2359_axes_0"), val = tensor([2])]; tensor var_2359_cast_fp16 = expand_dims(axes = var_2359_axes_0, x = hidden_states_537_cast_fp16)[name = string("op_2359_cast_fp16")]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; int32 gather_168_cast_uint16_to_int32 = cast(dtype = gather_168_cast_uint16_to_int32_dtype_0, x = gather_168_cast_uint16)[name = string("cast_322")]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (gather_166, gather_167, var_2234, gather_168_cast_uint16_to_int32, gather_169))[name = string("concat_95")]; tensor shape_23_cast_fp16 = shape(x = var_2359_cast_fp16)[name = string("shape_23_cast_fp16")]; int32 equal_23_y_0 = const()[name = string("equal_23_y_0"), val = int32(-1)]; tensor equal_23 = equal(x = concat_95, y = equal_23_y_0)[name = string("equal_23")]; tensor select_23 = select(a = shape_23_cast_fp16, b = concat_95, cond = equal_23)[name = string("select_23")]; tensor real_div_23 = real_div(x = select_23, y = shape_23_cast_fp16)[name = string("real_div_23")]; tensor hidden_states_539_cast_fp16 = tile(reps = real_div_23, x = var_2359_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, 16, -1, 128])]; tensor value_states_23_cast_fp16 = reshape(shape = concat_96x, x = hidden_states_539_cast_fp16)[name = string("value_states_23_cast_fp16")]; bool var_2370_transpose_x_1 = const()[name = string("op_2370_transpose_x_1"), val = bool(false)]; bool var_2370_transpose_y_1 = const()[name = string("op_2370_transpose_y_1"), val = bool(true)]; tensor var_2370_cast_fp16 = matmul(transpose_x = var_2370_transpose_x_1, transpose_y = var_2370_transpose_y_1, x = query_23_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_2370_cast_fp16")]; fp16 var_2371_to_fp16 = const()[name = string("op_2371_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_45_cast_fp16 = mul(x = var_2370_cast_fp16, y = var_2371_to_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor input_135_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_135_cast_fp16")]; tensor var_2374_cast_fp16 = softmax(axis = var_2235, x = input_135_cast_fp16)[name = string("op_2374_cast_fp16")]; bool attn_output_45_transpose_x_0 = const()[name = string("attn_output_45_transpose_x_0"), val = bool(false)]; bool attn_output_45_transpose_y_0 = const()[name = string("attn_output_45_transpose_y_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = var_2374_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_2378_perm_0 = const()[name = string("op_2378_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; int32 gather_157_cast_uint16_to_int32 = cast(dtype = gather_157_cast_uint16_to_int32_dtype_0, x = gather_157_cast_uint16)[name = string("cast_326")]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (gather_156, gather_157_cast_uint16_to_int32, var_2235))[name = string("concat_97")]; tensor var_2378_cast_fp16 = transpose(perm = var_2378_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_64")]; tensor var_2381_cast_fp16 = reshape(shape = concat_97, x = var_2378_cast_fp16)[name = string("op_2381_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665110976)))]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = var_2381_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor hidden_states_543_cast_fp16 = add(x = hidden_states_507_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_543_cast_fp16")]; fp16 var_2234_promoted_3_to_fp16 = const()[name = string("op_2234_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2388_cast_fp16 = pow(x = hidden_states_543_cast_fp16, y = var_2234_promoted_3_to_fp16)[name = string("op_2388_cast_fp16")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_2388_cast_fp16)[name = string("variance_95_cast_fp16")]; fp16 var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2392_cast_fp16 = add(x = variance_95_cast_fp16, y = var_2391_to_fp16)[name = string("op_2392_cast_fp16")]; fp32 var_2393_epsilon_0 = const()[name = string("op_2393_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2393_cast_fp16 = rsqrt(epsilon = var_2393_epsilon_0, x = var_2392_cast_fp16)[name = string("op_2393_cast_fp16")]; tensor hidden_states_547_cast_fp16 = mul(x = hidden_states_543_cast_fp16, y = var_2393_cast_fp16)[name = string("hidden_states_547_cast_fp16")]; tensor layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669305344)))]; tensor input_141_cast_fp16 = mul(x = layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_547_cast_fp16)[name = string("input_141_cast_fp16")]; tensor layers_11_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_11_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669307456)))]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2405_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2405_cast_fp16")]; tensor layers_11_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_11_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675598976)))]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_145_cast_fp16 = mul(x = var_2405_cast_fp16, y = linear_82_cast_fp16)[name = string("input_145_cast_fp16")]; tensor layers_11_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_11_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681890496)))]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor hidden_states_553_cast_fp16 = add(x = hidden_states_543_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_553_cast_fp16")]; int32 var_2422 = const()[name = string("op_2422"), val = int32(2)]; int32 var_2423 = const()[name = string("op_2423"), val = int32(-1)]; fp16 var_2422_promoted_to_fp16 = const()[name = string("op_2422_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2432_cast_fp16 = pow(x = hidden_states_553_cast_fp16, y = var_2422_promoted_to_fp16)[name = string("op_2432_cast_fp16")]; tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_2432_cast_fp16)[name = string("variance_97_cast_fp16")]; fp16 var_2435_to_fp16 = const()[name = string("op_2435_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2436_cast_fp16 = add(x = variance_97_cast_fp16, y = var_2435_to_fp16)[name = string("op_2436_cast_fp16")]; fp32 var_2437_epsilon_0 = const()[name = string("op_2437_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2437_cast_fp16 = rsqrt(epsilon = var_2437_epsilon_0, x = var_2436_cast_fp16)[name = string("op_2437_cast_fp16")]; tensor hidden_states_557_cast_fp16 = mul(x = hidden_states_553_cast_fp16, y = var_2437_cast_fp16)[name = string("hidden_states_557_cast_fp16")]; tensor layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688182016)))]; tensor hidden_states_561_cast_fp16 = mul(x = layers_12_input_layernorm_weight_to_fp16, y = hidden_states_557_cast_fp16)[name = string("hidden_states_561_cast_fp16")]; tensor var_2450_shape_cast_fp16 = shape(x = hidden_states_561_cast_fp16)[name = string("op_2450_shape_cast_fp16")]; int32 gather_170 = const()[name = string("gather_170"), val = int32(1)]; int32 gather_171_axis_0 = const()[name = string("gather_171_axis_0"), val = int32(0)]; int32 gather_171_batch_dims_0 = const()[name = string("gather_171_batch_dims_0"), val = int32(0)]; bool gather_171_validate_indices_0 = const()[name = string("gather_171_validate_indices_0"), val = bool(false)]; string var_2450_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2450_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_171_indices_0_to_uint16 = const()[name = string("gather_171_indices_0_to_uint16"), val = uint16(1)]; tensor var_2450_shape_cast_fp16_to_uint16 = cast(dtype = var_2450_shape_cast_fp16_to_uint16_dtype_0, x = var_2450_shape_cast_fp16)[name = string("cast_321")]; uint16 gather_171_cast_uint16 = gather(axis = gather_171_axis_0, batch_dims = gather_171_batch_dims_0, indices = gather_171_indices_0_to_uint16, validate_indices = gather_171_validate_indices_0, x = var_2450_shape_cast_fp16_to_uint16)[name = string("gather_171_cast_uint16")]; string gather_171_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_171_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_12_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_12_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688184128)))]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16, x = hidden_states_561_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor concat_98x = const()[name = string("concat_98x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_563_cast_fp16 = reshape(shape = concat_98x, x = linear_84_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; fp16 var_2422_promoted_1_to_fp16 = const()[name = string("op_2422_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_2458_cast_fp16 = pow(x = hidden_states_563_cast_fp16, y = var_2422_promoted_1_to_fp16)[name = string("op_2458_cast_fp16")]; tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_2458_cast_fp16)[name = string("variance_99_cast_fp16")]; fp16 var_2461_to_fp16 = const()[name = string("op_2461_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2462_cast_fp16 = add(x = variance_99_cast_fp16, y = var_2461_to_fp16)[name = string("op_2462_cast_fp16")]; fp32 var_2463_epsilon_0 = const()[name = string("op_2463_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2463_cast_fp16 = rsqrt(epsilon = var_2463_epsilon_0, x = var_2462_cast_fp16)[name = string("op_2463_cast_fp16")]; tensor hidden_states_567_cast_fp16 = mul(x = hidden_states_563_cast_fp16, y = var_2463_cast_fp16)[name = string("hidden_states_567_cast_fp16")]; tensor layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692378496)))]; tensor var_2466_cast_fp16 = mul(x = layers_12_self_attn_q_norm_weight_to_fp16, y = hidden_states_567_cast_fp16)[name = string("op_2466_cast_fp16")]; tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_12_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692378816)))]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16, x = hidden_states_561_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor concat_99x = const()[name = string("concat_99x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_571_cast_fp16 = reshape(shape = concat_99x, x = linear_85_cast_fp16)[name = string("hidden_states_571_cast_fp16")]; fp16 var_2422_promoted_2_to_fp16 = const()[name = string("op_2422_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_2474_cast_fp16 = pow(x = hidden_states_571_cast_fp16, y = var_2422_promoted_2_to_fp16)[name = string("op_2474_cast_fp16")]; tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_2474_cast_fp16)[name = string("variance_101_cast_fp16")]; fp16 var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2478_cast_fp16 = add(x = variance_101_cast_fp16, y = var_2477_to_fp16)[name = string("op_2478_cast_fp16")]; fp32 var_2479_epsilon_0 = const()[name = string("op_2479_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2479_cast_fp16 = rsqrt(epsilon = var_2479_epsilon_0, x = var_2478_cast_fp16)[name = string("op_2479_cast_fp16")]; tensor hidden_states_575_cast_fp16 = mul(x = hidden_states_571_cast_fp16, y = var_2479_cast_fp16)[name = string("hidden_states_575_cast_fp16")]; tensor layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694476032)))]; tensor var_2482_cast_fp16 = mul(x = layers_12_self_attn_k_norm_weight_to_fp16, y = hidden_states_575_cast_fp16)[name = string("op_2482_cast_fp16")]; tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_12_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694476352)))]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16, x = hidden_states_561_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 8, 128])]; tensor var_2487_cast_fp16 = reshape(shape = concat_100x, x = linear_86_cast_fp16)[name = string("op_2487_cast_fp16")]; tensor hidden_states_583_perm_0 = const()[name = string("hidden_states_583_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2466_cast_fp16)[name = string("transpose_63")]; tensor var_2491_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2491_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2502_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2502_cast_fp16")]; bool var_2504_interleave_0 = const()[name = string("op_2504_interleave_0"), val = bool(false)]; tensor var_2504_cast_fp16 = concat(axis = var_2423, interleave = var_2504_interleave_0, values = (var_2502_cast_fp16, x1_49_cast_fp16))[name = string("op_2504_cast_fp16")]; tensor var_2505_cast_fp16 = mul(x = var_2504_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2505_cast_fp16")]; tensor query_25_cast_fp16 = add(x = var_2491_cast_fp16, y = var_2505_cast_fp16)[name = string("query_25_cast_fp16")]; tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2482_cast_fp16)[name = string("transpose_62")]; tensor var_2507_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2507_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2518_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2518_cast_fp16")]; bool var_2520_interleave_0 = const()[name = string("op_2520_interleave_0"), val = bool(false)]; tensor var_2520_cast_fp16 = concat(axis = var_2423, interleave = var_2520_interleave_0, values = (var_2518_cast_fp16, x1_51_cast_fp16))[name = string("op_2520_cast_fp16")]; tensor var_2521_cast_fp16 = mul(x = var_2520_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2521_cast_fp16")]; tensor hidden_states_579_cast_fp16 = add(x = var_2507_cast_fp16, y = var_2521_cast_fp16)[name = string("hidden_states_579_cast_fp16")]; tensor var_2523_shape_cast_fp16 = shape(x = hidden_states_579_cast_fp16)[name = string("op_2523_shape_cast_fp16")]; int32 gather_176 = const()[name = string("gather_176"), val = int32(1)]; int32 gather_177 = const()[name = string("gather_177"), val = int32(8)]; int32 gather_178_axis_0 = const()[name = string("gather_178_axis_0"), val = int32(0)]; int32 gather_178_batch_dims_0 = const()[name = string("gather_178_batch_dims_0"), val = int32(0)]; bool gather_178_validate_indices_0 = const()[name = string("gather_178_validate_indices_0"), val = bool(false)]; string var_2523_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2523_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_178_indices_0_to_uint16 = const()[name = string("gather_178_indices_0_to_uint16"), val = uint16(2)]; tensor var_2523_shape_cast_fp16_to_uint16 = cast(dtype = var_2523_shape_cast_fp16_to_uint16_dtype_0, x = var_2523_shape_cast_fp16)[name = string("cast_319")]; uint16 gather_178_cast_uint16 = gather(axis = gather_178_axis_0, batch_dims = gather_178_batch_dims_0, indices = gather_178_indices_0_to_uint16, validate_indices = gather_178_validate_indices_0, x = var_2523_shape_cast_fp16_to_uint16)[name = string("gather_178_cast_uint16")]; string gather_178_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_178_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_179 = const()[name = string("gather_179"), val = int32(128)]; tensor var_2530_axes_0 = const()[name = string("op_2530_axes_0"), val = tensor([2])]; tensor var_2530_cast_fp16 = expand_dims(axes = var_2530_axes_0, x = hidden_states_579_cast_fp16)[name = string("op_2530_cast_fp16")]; int32 concat_101_axis_0 = const()[name = string("concat_101_axis_0"), val = int32(0)]; bool concat_101_interleave_0 = const()[name = string("concat_101_interleave_0"), val = bool(false)]; int32 gather_178_cast_uint16_to_int32 = cast(dtype = gather_178_cast_uint16_to_int32_dtype_0, x = gather_178_cast_uint16)[name = string("cast_318")]; tensor concat_101 = concat(axis = concat_101_axis_0, interleave = concat_101_interleave_0, values = (gather_176, gather_177, var_2422, gather_178_cast_uint16_to_int32, gather_179))[name = string("concat_101")]; tensor shape_24_cast_fp16 = shape(x = var_2530_cast_fp16)[name = string("shape_24_cast_fp16")]; int32 equal_24_y_0 = const()[name = string("equal_24_y_0"), val = int32(-1)]; tensor equal_24 = equal(x = concat_101, y = equal_24_y_0)[name = string("equal_24")]; tensor select_24 = select(a = shape_24_cast_fp16, b = concat_101, cond = equal_24)[name = string("select_24")]; tensor real_div_24 = real_div(x = select_24, y = shape_24_cast_fp16)[name = string("real_div_24")]; tensor hidden_states_581_cast_fp16 = tile(reps = real_div_24, x = var_2530_cast_fp16)[name = string("hidden_states_581_cast_fp16")]; tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, 16, -1, 128])]; tensor key_states_25_cast_fp16 = reshape(shape = concat_102x, x = hidden_states_581_cast_fp16)[name = string("key_states_25_cast_fp16")]; tensor hidden_states_583_cast_fp16 = transpose(perm = hidden_states_583_perm_0, x = var_2487_cast_fp16)[name = string("transpose_61")]; tensor var_2540_shape_cast_fp16 = shape(x = hidden_states_583_cast_fp16)[name = string("op_2540_shape_cast_fp16")]; int32 gather_180 = const()[name = string("gather_180"), val = int32(1)]; int32 gather_181 = const()[name = string("gather_181"), val = int32(8)]; int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; string var_2540_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2540_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_182_indices_0_to_uint16 = const()[name = string("gather_182_indices_0_to_uint16"), val = uint16(2)]; tensor var_2540_shape_cast_fp16_to_uint16 = cast(dtype = var_2540_shape_cast_fp16_to_uint16_dtype_0, x = var_2540_shape_cast_fp16)[name = string("cast_317")]; uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = gather_182_indices_0_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_2540_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_183 = const()[name = string("gather_183"), val = int32(128)]; tensor var_2547_axes_0 = const()[name = string("op_2547_axes_0"), val = tensor([2])]; tensor var_2547_cast_fp16 = expand_dims(axes = var_2547_axes_0, x = hidden_states_583_cast_fp16)[name = string("op_2547_cast_fp16")]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_316")]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_180, gather_181, var_2422, gather_182_cast_uint16_to_int32, gather_183))[name = string("concat_103")]; tensor shape_25_cast_fp16 = shape(x = var_2547_cast_fp16)[name = string("shape_25_cast_fp16")]; int32 equal_25_y_0 = const()[name = string("equal_25_y_0"), val = int32(-1)]; tensor equal_25 = equal(x = concat_103, y = equal_25_y_0)[name = string("equal_25")]; tensor select_25 = select(a = shape_25_cast_fp16, b = concat_103, cond = equal_25)[name = string("select_25")]; tensor real_div_25 = real_div(x = select_25, y = shape_25_cast_fp16)[name = string("real_div_25")]; tensor hidden_states_585_cast_fp16 = tile(reps = real_div_25, x = var_2547_cast_fp16)[name = string("hidden_states_585_cast_fp16")]; tensor concat_104x = const()[name = string("concat_104x"), val = tensor([1, 16, -1, 128])]; tensor value_states_25_cast_fp16 = reshape(shape = concat_104x, x = hidden_states_585_cast_fp16)[name = string("value_states_25_cast_fp16")]; bool var_2558_transpose_x_1 = const()[name = string("op_2558_transpose_x_1"), val = bool(false)]; bool var_2558_transpose_y_1 = const()[name = string("op_2558_transpose_y_1"), val = bool(true)]; tensor var_2558_cast_fp16 = matmul(transpose_x = var_2558_transpose_x_1, transpose_y = var_2558_transpose_y_1, x = query_25_cast_fp16, y = key_states_25_cast_fp16)[name = string("op_2558_cast_fp16")]; fp16 var_2559_to_fp16 = const()[name = string("op_2559_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_2558_cast_fp16, y = var_2559_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor input_147_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_147_cast_fp16")]; tensor var_2562_cast_fp16 = softmax(axis = var_2423, x = input_147_cast_fp16)[name = string("op_2562_cast_fp16")]; bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_2562_cast_fp16, y = value_states_25_cast_fp16)[name = string("attn_output_49_cast_fp16")]; tensor var_2566_perm_0 = const()[name = string("op_2566_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)]; bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)]; int32 gather_171_cast_uint16_to_int32 = cast(dtype = gather_171_cast_uint16_to_int32_dtype_0, x = gather_171_cast_uint16)[name = string("cast_320")]; tensor concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (gather_170, gather_171_cast_uint16_to_int32, var_2423))[name = string("concat_105")]; tensor var_2566_cast_fp16 = transpose(perm = var_2566_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_60")]; tensor var_2569_cast_fp16 = reshape(shape = concat_105, x = var_2566_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor layers_12_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_12_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(696573568)))]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16, x = var_2569_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor hidden_states_589_cast_fp16 = add(x = hidden_states_553_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_589_cast_fp16")]; fp16 var_2422_promoted_3_to_fp16 = const()[name = string("op_2422_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2576_cast_fp16 = pow(x = hidden_states_589_cast_fp16, y = var_2422_promoted_3_to_fp16)[name = string("op_2576_cast_fp16")]; tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_2576_cast_fp16)[name = string("variance_103_cast_fp16")]; fp16 var_2579_to_fp16 = const()[name = string("op_2579_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2580_cast_fp16 = add(x = variance_103_cast_fp16, y = var_2579_to_fp16)[name = string("op_2580_cast_fp16")]; fp32 var_2581_epsilon_0 = const()[name = string("op_2581_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2581_cast_fp16 = rsqrt(epsilon = var_2581_epsilon_0, x = var_2580_cast_fp16)[name = string("op_2581_cast_fp16")]; tensor hidden_states_593_cast_fp16 = mul(x = hidden_states_589_cast_fp16, y = var_2581_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; tensor layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700767936)))]; tensor input_153_cast_fp16 = mul(x = layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_593_cast_fp16)[name = string("input_153_cast_fp16")]; tensor layers_12_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_12_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700770048)))]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16, x = input_153_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2593_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2593_cast_fp16")]; tensor layers_12_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_12_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707061568)))]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16, x = input_153_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_157_cast_fp16 = mul(x = var_2593_cast_fp16, y = linear_89_cast_fp16)[name = string("input_157_cast_fp16")]; tensor layers_12_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_12_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713353088)))]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16, x = input_157_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor hidden_states_599_cast_fp16 = add(x = hidden_states_589_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_599_cast_fp16")]; int32 var_2610 = const()[name = string("op_2610"), val = int32(2)]; int32 var_2611 = const()[name = string("op_2611"), val = int32(-1)]; fp16 var_2610_promoted_to_fp16 = const()[name = string("op_2610_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2620_cast_fp16 = pow(x = hidden_states_599_cast_fp16, y = var_2610_promoted_to_fp16)[name = string("op_2620_cast_fp16")]; tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_2620_cast_fp16)[name = string("variance_105_cast_fp16")]; fp16 var_2623_to_fp16 = const()[name = string("op_2623_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2624_cast_fp16 = add(x = variance_105_cast_fp16, y = var_2623_to_fp16)[name = string("op_2624_cast_fp16")]; fp32 var_2625_epsilon_0 = const()[name = string("op_2625_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2625_cast_fp16 = rsqrt(epsilon = var_2625_epsilon_0, x = var_2624_cast_fp16)[name = string("op_2625_cast_fp16")]; tensor hidden_states_603_cast_fp16 = mul(x = hidden_states_599_cast_fp16, y = var_2625_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; tensor layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719644608)))]; tensor hidden_states_607_cast_fp16 = mul(x = layers_13_input_layernorm_weight_to_fp16, y = hidden_states_603_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; tensor var_2638_shape_cast_fp16 = shape(x = hidden_states_607_cast_fp16)[name = string("op_2638_shape_cast_fp16")]; int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; string var_2638_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2638_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_185_indices_0_to_uint16 = const()[name = string("gather_185_indices_0_to_uint16"), val = uint16(1)]; tensor var_2638_shape_cast_fp16_to_uint16 = cast(dtype = var_2638_shape_cast_fp16_to_uint16_dtype_0, x = var_2638_shape_cast_fp16)[name = string("cast_315")]; uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = gather_185_indices_0_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_2638_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_13_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_13_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719646720)))]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16, x = hidden_states_607_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor concat_106x = const()[name = string("concat_106x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_609_cast_fp16 = reshape(shape = concat_106x, x = linear_91_cast_fp16)[name = string("hidden_states_609_cast_fp16")]; fp16 var_2610_promoted_1_to_fp16 = const()[name = string("op_2610_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_2646_cast_fp16 = pow(x = hidden_states_609_cast_fp16, y = var_2610_promoted_1_to_fp16)[name = string("op_2646_cast_fp16")]; tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_2646_cast_fp16)[name = string("variance_107_cast_fp16")]; fp16 var_2649_to_fp16 = const()[name = string("op_2649_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2650_cast_fp16 = add(x = variance_107_cast_fp16, y = var_2649_to_fp16)[name = string("op_2650_cast_fp16")]; fp32 var_2651_epsilon_0 = const()[name = string("op_2651_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2651_cast_fp16 = rsqrt(epsilon = var_2651_epsilon_0, x = var_2650_cast_fp16)[name = string("op_2651_cast_fp16")]; tensor hidden_states_613_cast_fp16 = mul(x = hidden_states_609_cast_fp16, y = var_2651_cast_fp16)[name = string("hidden_states_613_cast_fp16")]; tensor layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723841088)))]; tensor var_2654_cast_fp16 = mul(x = layers_13_self_attn_q_norm_weight_to_fp16, y = hidden_states_613_cast_fp16)[name = string("op_2654_cast_fp16")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_13_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(723841408)))]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16, x = hidden_states_607_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor concat_107x = const()[name = string("concat_107x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_617_cast_fp16 = reshape(shape = concat_107x, x = linear_92_cast_fp16)[name = string("hidden_states_617_cast_fp16")]; fp16 var_2610_promoted_2_to_fp16 = const()[name = string("op_2610_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_2662_cast_fp16 = pow(x = hidden_states_617_cast_fp16, y = var_2610_promoted_2_to_fp16)[name = string("op_2662_cast_fp16")]; tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_2662_cast_fp16)[name = string("variance_109_cast_fp16")]; fp16 var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2666_cast_fp16 = add(x = variance_109_cast_fp16, y = var_2665_to_fp16)[name = string("op_2666_cast_fp16")]; fp32 var_2667_epsilon_0 = const()[name = string("op_2667_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2667_cast_fp16 = rsqrt(epsilon = var_2667_epsilon_0, x = var_2666_cast_fp16)[name = string("op_2667_cast_fp16")]; tensor hidden_states_621_cast_fp16 = mul(x = hidden_states_617_cast_fp16, y = var_2667_cast_fp16)[name = string("hidden_states_621_cast_fp16")]; tensor layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725938624)))]; tensor var_2670_cast_fp16 = mul(x = layers_13_self_attn_k_norm_weight_to_fp16, y = hidden_states_621_cast_fp16)[name = string("op_2670_cast_fp16")]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_13_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725938944)))]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16, x = hidden_states_607_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor concat_108x = const()[name = string("concat_108x"), val = tensor([1, -1, 8, 128])]; tensor var_2675_cast_fp16 = reshape(shape = concat_108x, x = linear_93_cast_fp16)[name = string("op_2675_cast_fp16")]; tensor hidden_states_629_perm_0 = const()[name = string("hidden_states_629_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2654_cast_fp16)[name = string("transpose_59")]; tensor var_2679_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2679_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2690_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2690_cast_fp16")]; bool var_2692_interleave_0 = const()[name = string("op_2692_interleave_0"), val = bool(false)]; tensor var_2692_cast_fp16 = concat(axis = var_2611, interleave = var_2692_interleave_0, values = (var_2690_cast_fp16, x1_53_cast_fp16))[name = string("op_2692_cast_fp16")]; tensor var_2693_cast_fp16 = mul(x = var_2692_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2693_cast_fp16")]; tensor query_27_cast_fp16 = add(x = var_2679_cast_fp16, y = var_2693_cast_fp16)[name = string("query_27_cast_fp16")]; tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2670_cast_fp16)[name = string("transpose_58")]; tensor var_2695_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2695_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2706_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2706_cast_fp16")]; bool var_2708_interleave_0 = const()[name = string("op_2708_interleave_0"), val = bool(false)]; tensor var_2708_cast_fp16 = concat(axis = var_2611, interleave = var_2708_interleave_0, values = (var_2706_cast_fp16, x1_55_cast_fp16))[name = string("op_2708_cast_fp16")]; tensor var_2709_cast_fp16 = mul(x = var_2708_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2709_cast_fp16")]; tensor hidden_states_625_cast_fp16 = add(x = var_2695_cast_fp16, y = var_2709_cast_fp16)[name = string("hidden_states_625_cast_fp16")]; tensor var_2711_shape_cast_fp16 = shape(x = hidden_states_625_cast_fp16)[name = string("op_2711_shape_cast_fp16")]; int32 gather_190 = const()[name = string("gather_190"), val = int32(1)]; int32 gather_191 = const()[name = string("gather_191"), val = int32(8)]; int32 gather_192_axis_0 = const()[name = string("gather_192_axis_0"), val = int32(0)]; int32 gather_192_batch_dims_0 = const()[name = string("gather_192_batch_dims_0"), val = int32(0)]; bool gather_192_validate_indices_0 = const()[name = string("gather_192_validate_indices_0"), val = bool(false)]; string var_2711_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2711_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_192_indices_0_to_uint16 = const()[name = string("gather_192_indices_0_to_uint16"), val = uint16(2)]; tensor var_2711_shape_cast_fp16_to_uint16 = cast(dtype = var_2711_shape_cast_fp16_to_uint16_dtype_0, x = var_2711_shape_cast_fp16)[name = string("cast_313")]; uint16 gather_192_cast_uint16 = gather(axis = gather_192_axis_0, batch_dims = gather_192_batch_dims_0, indices = gather_192_indices_0_to_uint16, validate_indices = gather_192_validate_indices_0, x = var_2711_shape_cast_fp16_to_uint16)[name = string("gather_192_cast_uint16")]; string gather_192_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_192_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_193 = const()[name = string("gather_193"), val = int32(128)]; tensor var_2718_axes_0 = const()[name = string("op_2718_axes_0"), val = tensor([2])]; tensor var_2718_cast_fp16 = expand_dims(axes = var_2718_axes_0, x = hidden_states_625_cast_fp16)[name = string("op_2718_cast_fp16")]; int32 concat_109_axis_0 = const()[name = string("concat_109_axis_0"), val = int32(0)]; bool concat_109_interleave_0 = const()[name = string("concat_109_interleave_0"), val = bool(false)]; int32 gather_192_cast_uint16_to_int32 = cast(dtype = gather_192_cast_uint16_to_int32_dtype_0, x = gather_192_cast_uint16)[name = string("cast_312")]; tensor concat_109 = concat(axis = concat_109_axis_0, interleave = concat_109_interleave_0, values = (gather_190, gather_191, var_2610, gather_192_cast_uint16_to_int32, gather_193))[name = string("concat_109")]; tensor shape_26_cast_fp16 = shape(x = var_2718_cast_fp16)[name = string("shape_26_cast_fp16")]; int32 equal_26_y_0 = const()[name = string("equal_26_y_0"), val = int32(-1)]; tensor equal_26 = equal(x = concat_109, y = equal_26_y_0)[name = string("equal_26")]; tensor select_26 = select(a = shape_26_cast_fp16, b = concat_109, cond = equal_26)[name = string("select_26")]; tensor real_div_26 = real_div(x = select_26, y = shape_26_cast_fp16)[name = string("real_div_26")]; tensor hidden_states_627_cast_fp16 = tile(reps = real_div_26, x = var_2718_cast_fp16)[name = string("hidden_states_627_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, 16, -1, 128])]; tensor key_states_27_cast_fp16 = reshape(shape = concat_110x, x = hidden_states_627_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor hidden_states_629_cast_fp16 = transpose(perm = hidden_states_629_perm_0, x = var_2675_cast_fp16)[name = string("transpose_57")]; tensor var_2728_shape_cast_fp16 = shape(x = hidden_states_629_cast_fp16)[name = string("op_2728_shape_cast_fp16")]; int32 gather_194 = const()[name = string("gather_194"), val = int32(1)]; int32 gather_195 = const()[name = string("gather_195"), val = int32(8)]; int32 gather_196_axis_0 = const()[name = string("gather_196_axis_0"), val = int32(0)]; int32 gather_196_batch_dims_0 = const()[name = string("gather_196_batch_dims_0"), val = int32(0)]; bool gather_196_validate_indices_0 = const()[name = string("gather_196_validate_indices_0"), val = bool(false)]; string var_2728_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2728_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_196_indices_0_to_uint16 = const()[name = string("gather_196_indices_0_to_uint16"), val = uint16(2)]; tensor var_2728_shape_cast_fp16_to_uint16 = cast(dtype = var_2728_shape_cast_fp16_to_uint16_dtype_0, x = var_2728_shape_cast_fp16)[name = string("cast_311")]; uint16 gather_196_cast_uint16 = gather(axis = gather_196_axis_0, batch_dims = gather_196_batch_dims_0, indices = gather_196_indices_0_to_uint16, validate_indices = gather_196_validate_indices_0, x = var_2728_shape_cast_fp16_to_uint16)[name = string("gather_196_cast_uint16")]; string gather_196_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_196_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_197 = const()[name = string("gather_197"), val = int32(128)]; tensor var_2735_axes_0 = const()[name = string("op_2735_axes_0"), val = tensor([2])]; tensor var_2735_cast_fp16 = expand_dims(axes = var_2735_axes_0, x = hidden_states_629_cast_fp16)[name = string("op_2735_cast_fp16")]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; int32 gather_196_cast_uint16_to_int32 = cast(dtype = gather_196_cast_uint16_to_int32_dtype_0, x = gather_196_cast_uint16)[name = string("cast_310")]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (gather_194, gather_195, var_2610, gather_196_cast_uint16_to_int32, gather_197))[name = string("concat_111")]; tensor shape_27_cast_fp16 = shape(x = var_2735_cast_fp16)[name = string("shape_27_cast_fp16")]; int32 equal_27_y_0 = const()[name = string("equal_27_y_0"), val = int32(-1)]; tensor equal_27 = equal(x = concat_111, y = equal_27_y_0)[name = string("equal_27")]; tensor select_27 = select(a = shape_27_cast_fp16, b = concat_111, cond = equal_27)[name = string("select_27")]; tensor real_div_27 = real_div(x = select_27, y = shape_27_cast_fp16)[name = string("real_div_27")]; tensor hidden_states_631_cast_fp16 = tile(reps = real_div_27, x = var_2735_cast_fp16)[name = string("hidden_states_631_cast_fp16")]; tensor concat_112x = const()[name = string("concat_112x"), val = tensor([1, 16, -1, 128])]; tensor value_states_27_cast_fp16 = reshape(shape = concat_112x, x = hidden_states_631_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_2746_transpose_x_1 = const()[name = string("op_2746_transpose_x_1"), val = bool(false)]; bool var_2746_transpose_y_1 = const()[name = string("op_2746_transpose_y_1"), val = bool(true)]; tensor var_2746_cast_fp16 = matmul(transpose_x = var_2746_transpose_x_1, transpose_y = var_2746_transpose_y_1, x = query_27_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_2746_cast_fp16")]; fp16 var_2747_to_fp16 = const()[name = string("op_2747_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_53_cast_fp16 = mul(x = var_2746_cast_fp16, y = var_2747_to_fp16)[name = string("attn_weights_53_cast_fp16")]; tensor input_159_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_159_cast_fp16")]; tensor var_2750_cast_fp16 = softmax(axis = var_2611, x = input_159_cast_fp16)[name = string("op_2750_cast_fp16")]; bool attn_output_53_transpose_x_0 = const()[name = string("attn_output_53_transpose_x_0"), val = bool(false)]; bool attn_output_53_transpose_y_0 = const()[name = string("attn_output_53_transpose_y_0"), val = bool(false)]; tensor attn_output_53_cast_fp16 = matmul(transpose_x = attn_output_53_transpose_x_0, transpose_y = attn_output_53_transpose_y_0, x = var_2750_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_53_cast_fp16")]; tensor var_2754_perm_0 = const()[name = string("op_2754_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_113_axis_0 = const()[name = string("concat_113_axis_0"), val = int32(0)]; bool concat_113_interleave_0 = const()[name = string("concat_113_interleave_0"), val = bool(false)]; int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_314")]; tensor concat_113 = concat(axis = concat_113_axis_0, interleave = concat_113_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_2611))[name = string("concat_113")]; tensor var_2754_cast_fp16 = transpose(perm = var_2754_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_56")]; tensor var_2757_cast_fp16 = reshape(shape = concat_113, x = var_2754_cast_fp16)[name = string("op_2757_cast_fp16")]; tensor layers_13_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_13_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(728036160)))]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16, x = var_2757_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor hidden_states_635_cast_fp16 = add(x = hidden_states_599_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_635_cast_fp16")]; fp16 var_2610_promoted_3_to_fp16 = const()[name = string("op_2610_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2764_cast_fp16 = pow(x = hidden_states_635_cast_fp16, y = var_2610_promoted_3_to_fp16)[name = string("op_2764_cast_fp16")]; tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_2764_cast_fp16)[name = string("variance_111_cast_fp16")]; fp16 var_2767_to_fp16 = const()[name = string("op_2767_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2768_cast_fp16 = add(x = variance_111_cast_fp16, y = var_2767_to_fp16)[name = string("op_2768_cast_fp16")]; fp32 var_2769_epsilon_0 = const()[name = string("op_2769_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2769_cast_fp16 = rsqrt(epsilon = var_2769_epsilon_0, x = var_2768_cast_fp16)[name = string("op_2769_cast_fp16")]; tensor hidden_states_639_cast_fp16 = mul(x = hidden_states_635_cast_fp16, y = var_2769_cast_fp16)[name = string("hidden_states_639_cast_fp16")]; tensor layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732230528)))]; tensor input_165_cast_fp16 = mul(x = layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_639_cast_fp16)[name = string("input_165_cast_fp16")]; tensor layers_13_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_13_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732232640)))]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16, x = input_165_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_2781_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2781_cast_fp16")]; tensor layers_13_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_13_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738524160)))]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16, x = input_165_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_169_cast_fp16 = mul(x = var_2781_cast_fp16, y = linear_96_cast_fp16)[name = string("input_169_cast_fp16")]; tensor layers_13_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_13_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744815680)))]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16, x = input_169_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor hidden_states_645_cast_fp16 = add(x = hidden_states_635_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_645_cast_fp16")]; int32 var_2798 = const()[name = string("op_2798"), val = int32(2)]; int32 var_2799 = const()[name = string("op_2799"), val = int32(-1)]; fp16 var_2798_promoted_to_fp16 = const()[name = string("op_2798_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2808_cast_fp16 = pow(x = hidden_states_645_cast_fp16, y = var_2798_promoted_to_fp16)[name = string("op_2808_cast_fp16")]; tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_2808_cast_fp16)[name = string("variance_113_cast_fp16")]; fp16 var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2812_cast_fp16 = add(x = variance_113_cast_fp16, y = var_2811_to_fp16)[name = string("op_2812_cast_fp16")]; fp32 var_2813_epsilon_0 = const()[name = string("op_2813_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2813_cast_fp16 = rsqrt(epsilon = var_2813_epsilon_0, x = var_2812_cast_fp16)[name = string("op_2813_cast_fp16")]; tensor hidden_states_649_cast_fp16 = mul(x = hidden_states_645_cast_fp16, y = var_2813_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; tensor layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751107200)))]; tensor hidden_states_653_cast_fp16 = mul(x = layers_14_input_layernorm_weight_to_fp16, y = hidden_states_649_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; tensor var_2826_shape_cast_fp16 = shape(x = hidden_states_653_cast_fp16)[name = string("op_2826_shape_cast_fp16")]; int32 gather_198 = const()[name = string("gather_198"), val = int32(1)]; int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; string var_2826_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2826_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_199_indices_0_to_uint16 = const()[name = string("gather_199_indices_0_to_uint16"), val = uint16(1)]; tensor var_2826_shape_cast_fp16_to_uint16 = cast(dtype = var_2826_shape_cast_fp16_to_uint16_dtype_0, x = var_2826_shape_cast_fp16)[name = string("cast_309")]; uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = gather_199_indices_0_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2826_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_14_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_14_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751109312)))]; tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_14_self_attn_q_proj_weight_to_fp16, x = hidden_states_653_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_655_cast_fp16 = reshape(shape = concat_114x, x = linear_98_cast_fp16)[name = string("hidden_states_655_cast_fp16")]; fp16 var_2798_promoted_1_to_fp16 = const()[name = string("op_2798_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_2834_cast_fp16 = pow(x = hidden_states_655_cast_fp16, y = var_2798_promoted_1_to_fp16)[name = string("op_2834_cast_fp16")]; tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_2834_cast_fp16)[name = string("variance_115_cast_fp16")]; fp16 var_2837_to_fp16 = const()[name = string("op_2837_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2838_cast_fp16 = add(x = variance_115_cast_fp16, y = var_2837_to_fp16)[name = string("op_2838_cast_fp16")]; fp32 var_2839_epsilon_0 = const()[name = string("op_2839_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2839_cast_fp16 = rsqrt(epsilon = var_2839_epsilon_0, x = var_2838_cast_fp16)[name = string("op_2839_cast_fp16")]; tensor hidden_states_659_cast_fp16 = mul(x = hidden_states_655_cast_fp16, y = var_2839_cast_fp16)[name = string("hidden_states_659_cast_fp16")]; tensor layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755303680)))]; tensor var_2842_cast_fp16 = mul(x = layers_14_self_attn_q_norm_weight_to_fp16, y = hidden_states_659_cast_fp16)[name = string("op_2842_cast_fp16")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_14_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_14_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755304000)))]; tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_k_proj_weight_to_fp16, x = hidden_states_653_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_663_cast_fp16 = reshape(shape = concat_115x, x = linear_99_cast_fp16)[name = string("hidden_states_663_cast_fp16")]; fp16 var_2798_promoted_2_to_fp16 = const()[name = string("op_2798_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_2850_cast_fp16 = pow(x = hidden_states_663_cast_fp16, y = var_2798_promoted_2_to_fp16)[name = string("op_2850_cast_fp16")]; tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_2850_cast_fp16)[name = string("variance_117_cast_fp16")]; fp16 var_2853_to_fp16 = const()[name = string("op_2853_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2854_cast_fp16 = add(x = variance_117_cast_fp16, y = var_2853_to_fp16)[name = string("op_2854_cast_fp16")]; fp32 var_2855_epsilon_0 = const()[name = string("op_2855_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2855_cast_fp16 = rsqrt(epsilon = var_2855_epsilon_0, x = var_2854_cast_fp16)[name = string("op_2855_cast_fp16")]; tensor hidden_states_667_cast_fp16 = mul(x = hidden_states_663_cast_fp16, y = var_2855_cast_fp16)[name = string("hidden_states_667_cast_fp16")]; tensor layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757401216)))]; tensor var_2858_cast_fp16 = mul(x = layers_14_self_attn_k_norm_weight_to_fp16, y = hidden_states_667_cast_fp16)[name = string("op_2858_cast_fp16")]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_14_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_14_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757401536)))]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_v_proj_weight_to_fp16, x = hidden_states_653_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 8, 128])]; tensor var_2863_cast_fp16 = reshape(shape = concat_116x, x = linear_100_cast_fp16)[name = string("op_2863_cast_fp16")]; tensor hidden_states_675_perm_0 = const()[name = string("hidden_states_675_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2842_cast_fp16)[name = string("transpose_55")]; tensor var_2867_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2867_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2878_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2878_cast_fp16")]; bool var_2880_interleave_0 = const()[name = string("op_2880_interleave_0"), val = bool(false)]; tensor var_2880_cast_fp16 = concat(axis = var_2799, interleave = var_2880_interleave_0, values = (var_2878_cast_fp16, x1_57_cast_fp16))[name = string("op_2880_cast_fp16")]; tensor var_2881_cast_fp16 = mul(x = var_2880_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2881_cast_fp16")]; tensor query_29_cast_fp16 = add(x = var_2867_cast_fp16, y = var_2881_cast_fp16)[name = string("query_29_cast_fp16")]; tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2858_cast_fp16)[name = string("transpose_54")]; tensor var_2883_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2883_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2894_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2894_cast_fp16")]; bool var_2896_interleave_0 = const()[name = string("op_2896_interleave_0"), val = bool(false)]; tensor var_2896_cast_fp16 = concat(axis = var_2799, interleave = var_2896_interleave_0, values = (var_2894_cast_fp16, x1_59_cast_fp16))[name = string("op_2896_cast_fp16")]; tensor var_2897_cast_fp16 = mul(x = var_2896_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2897_cast_fp16")]; tensor hidden_states_671_cast_fp16 = add(x = var_2883_cast_fp16, y = var_2897_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; tensor var_2899_shape_cast_fp16 = shape(x = hidden_states_671_cast_fp16)[name = string("op_2899_shape_cast_fp16")]; int32 gather_204 = const()[name = string("gather_204"), val = int32(1)]; int32 gather_205 = const()[name = string("gather_205"), val = int32(8)]; int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; string var_2899_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2899_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_206_indices_0_to_uint16 = const()[name = string("gather_206_indices_0_to_uint16"), val = uint16(2)]; tensor var_2899_shape_cast_fp16_to_uint16 = cast(dtype = var_2899_shape_cast_fp16_to_uint16_dtype_0, x = var_2899_shape_cast_fp16)[name = string("cast_307")]; uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = gather_206_indices_0_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_2899_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_207 = const()[name = string("gather_207"), val = int32(128)]; tensor var_2906_axes_0 = const()[name = string("op_2906_axes_0"), val = tensor([2])]; tensor var_2906_cast_fp16 = expand_dims(axes = var_2906_axes_0, x = hidden_states_671_cast_fp16)[name = string("op_2906_cast_fp16")]; int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)]; bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)]; int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_306")]; tensor concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (gather_204, gather_205, var_2798, gather_206_cast_uint16_to_int32, gather_207))[name = string("concat_117")]; tensor shape_28_cast_fp16 = shape(x = var_2906_cast_fp16)[name = string("shape_28_cast_fp16")]; int32 equal_28_y_0 = const()[name = string("equal_28_y_0"), val = int32(-1)]; tensor equal_28 = equal(x = concat_117, y = equal_28_y_0)[name = string("equal_28")]; tensor select_28 = select(a = shape_28_cast_fp16, b = concat_117, cond = equal_28)[name = string("select_28")]; tensor real_div_28 = real_div(x = select_28, y = shape_28_cast_fp16)[name = string("real_div_28")]; tensor hidden_states_673_cast_fp16 = tile(reps = real_div_28, x = var_2906_cast_fp16)[name = string("hidden_states_673_cast_fp16")]; tensor concat_118x = const()[name = string("concat_118x"), val = tensor([1, 16, -1, 128])]; tensor key_states_29_cast_fp16 = reshape(shape = concat_118x, x = hidden_states_673_cast_fp16)[name = string("key_states_29_cast_fp16")]; tensor hidden_states_675_cast_fp16 = transpose(perm = hidden_states_675_perm_0, x = var_2863_cast_fp16)[name = string("transpose_53")]; tensor var_2916_shape_cast_fp16 = shape(x = hidden_states_675_cast_fp16)[name = string("op_2916_shape_cast_fp16")]; int32 gather_208 = const()[name = string("gather_208"), val = int32(1)]; int32 gather_209 = const()[name = string("gather_209"), val = int32(8)]; int32 gather_210_axis_0 = const()[name = string("gather_210_axis_0"), val = int32(0)]; int32 gather_210_batch_dims_0 = const()[name = string("gather_210_batch_dims_0"), val = int32(0)]; bool gather_210_validate_indices_0 = const()[name = string("gather_210_validate_indices_0"), val = bool(false)]; string var_2916_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2916_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_210_indices_0_to_uint16 = const()[name = string("gather_210_indices_0_to_uint16"), val = uint16(2)]; tensor var_2916_shape_cast_fp16_to_uint16 = cast(dtype = var_2916_shape_cast_fp16_to_uint16_dtype_0, x = var_2916_shape_cast_fp16)[name = string("cast_305")]; uint16 gather_210_cast_uint16 = gather(axis = gather_210_axis_0, batch_dims = gather_210_batch_dims_0, indices = gather_210_indices_0_to_uint16, validate_indices = gather_210_validate_indices_0, x = var_2916_shape_cast_fp16_to_uint16)[name = string("gather_210_cast_uint16")]; string gather_210_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_210_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_211 = const()[name = string("gather_211"), val = int32(128)]; tensor var_2923_axes_0 = const()[name = string("op_2923_axes_0"), val = tensor([2])]; tensor var_2923_cast_fp16 = expand_dims(axes = var_2923_axes_0, x = hidden_states_675_cast_fp16)[name = string("op_2923_cast_fp16")]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; int32 gather_210_cast_uint16_to_int32 = cast(dtype = gather_210_cast_uint16_to_int32_dtype_0, x = gather_210_cast_uint16)[name = string("cast_304")]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (gather_208, gather_209, var_2798, gather_210_cast_uint16_to_int32, gather_211))[name = string("concat_119")]; tensor shape_29_cast_fp16 = shape(x = var_2923_cast_fp16)[name = string("shape_29_cast_fp16")]; int32 equal_29_y_0 = const()[name = string("equal_29_y_0"), val = int32(-1)]; tensor equal_29 = equal(x = concat_119, y = equal_29_y_0)[name = string("equal_29")]; tensor select_29 = select(a = shape_29_cast_fp16, b = concat_119, cond = equal_29)[name = string("select_29")]; tensor real_div_29 = real_div(x = select_29, y = shape_29_cast_fp16)[name = string("real_div_29")]; tensor hidden_states_677_cast_fp16 = tile(reps = real_div_29, x = var_2923_cast_fp16)[name = string("hidden_states_677_cast_fp16")]; tensor concat_120x = const()[name = string("concat_120x"), val = tensor([1, 16, -1, 128])]; tensor value_states_29_cast_fp16 = reshape(shape = concat_120x, x = hidden_states_677_cast_fp16)[name = string("value_states_29_cast_fp16")]; bool var_2934_transpose_x_1 = const()[name = string("op_2934_transpose_x_1"), val = bool(false)]; bool var_2934_transpose_y_1 = const()[name = string("op_2934_transpose_y_1"), val = bool(true)]; tensor var_2934_cast_fp16 = matmul(transpose_x = var_2934_transpose_x_1, transpose_y = var_2934_transpose_y_1, x = query_29_cast_fp16, y = key_states_29_cast_fp16)[name = string("op_2934_cast_fp16")]; fp16 var_2935_to_fp16 = const()[name = string("op_2935_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_57_cast_fp16 = mul(x = var_2934_cast_fp16, y = var_2935_to_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor input_171_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_171_cast_fp16")]; tensor var_2938_cast_fp16 = softmax(axis = var_2799, x = input_171_cast_fp16)[name = string("op_2938_cast_fp16")]; bool attn_output_57_transpose_x_0 = const()[name = string("attn_output_57_transpose_x_0"), val = bool(false)]; bool attn_output_57_transpose_y_0 = const()[name = string("attn_output_57_transpose_y_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = matmul(transpose_x = attn_output_57_transpose_x_0, transpose_y = attn_output_57_transpose_y_0, x = var_2938_cast_fp16, y = value_states_29_cast_fp16)[name = string("attn_output_57_cast_fp16")]; tensor var_2942_perm_0 = const()[name = string("op_2942_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_121_axis_0 = const()[name = string("concat_121_axis_0"), val = int32(0)]; bool concat_121_interleave_0 = const()[name = string("concat_121_interleave_0"), val = bool(false)]; int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_308")]; tensor concat_121 = concat(axis = concat_121_axis_0, interleave = concat_121_interleave_0, values = (gather_198, gather_199_cast_uint16_to_int32, var_2799))[name = string("concat_121")]; tensor var_2942_cast_fp16 = transpose(perm = var_2942_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_52")]; tensor var_2945_cast_fp16 = reshape(shape = concat_121, x = var_2942_cast_fp16)[name = string("op_2945_cast_fp16")]; tensor layers_14_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_14_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759498752)))]; tensor linear_101_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_o_proj_weight_to_fp16, x = var_2945_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor hidden_states_681_cast_fp16 = add(x = hidden_states_645_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_681_cast_fp16")]; fp16 var_2798_promoted_3_to_fp16 = const()[name = string("op_2798_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2952_cast_fp16 = pow(x = hidden_states_681_cast_fp16, y = var_2798_promoted_3_to_fp16)[name = string("op_2952_cast_fp16")]; tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_2952_cast_fp16)[name = string("variance_119_cast_fp16")]; fp16 var_2955_to_fp16 = const()[name = string("op_2955_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2956_cast_fp16 = add(x = variance_119_cast_fp16, y = var_2955_to_fp16)[name = string("op_2956_cast_fp16")]; fp32 var_2957_epsilon_0 = const()[name = string("op_2957_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2957_cast_fp16 = rsqrt(epsilon = var_2957_epsilon_0, x = var_2956_cast_fp16)[name = string("op_2957_cast_fp16")]; tensor hidden_states_685_cast_fp16 = mul(x = hidden_states_681_cast_fp16, y = var_2957_cast_fp16)[name = string("hidden_states_685_cast_fp16")]; tensor layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763693120)))]; tensor input_177_cast_fp16 = mul(x = layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_685_cast_fp16)[name = string("input_177_cast_fp16")]; tensor layers_14_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_14_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763695232)))]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_gate_proj_weight_to_fp16, x = input_177_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_2969_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2969_cast_fp16")]; tensor layers_14_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_14_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769986752)))]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_up_proj_weight_to_fp16, x = input_177_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_181_cast_fp16 = mul(x = var_2969_cast_fp16, y = linear_103_cast_fp16)[name = string("input_181_cast_fp16")]; tensor layers_14_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_14_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776278272)))]; tensor linear_104_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_mlp_down_proj_weight_to_fp16, x = input_181_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor hidden_states_691_cast_fp16 = add(x = hidden_states_681_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_691_cast_fp16")]; int32 var_2986 = const()[name = string("op_2986"), val = int32(2)]; int32 var_2987 = const()[name = string("op_2987"), val = int32(-1)]; fp16 var_2986_promoted_to_fp16 = const()[name = string("op_2986_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2996_cast_fp16 = pow(x = hidden_states_691_cast_fp16, y = var_2986_promoted_to_fp16)[name = string("op_2996_cast_fp16")]; tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_2996_cast_fp16)[name = string("variance_121_cast_fp16")]; fp16 var_2999_to_fp16 = const()[name = string("op_2999_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3000_cast_fp16 = add(x = variance_121_cast_fp16, y = var_2999_to_fp16)[name = string("op_3000_cast_fp16")]; fp32 var_3001_epsilon_0 = const()[name = string("op_3001_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3001_cast_fp16 = rsqrt(epsilon = var_3001_epsilon_0, x = var_3000_cast_fp16)[name = string("op_3001_cast_fp16")]; tensor hidden_states_695_cast_fp16 = mul(x = hidden_states_691_cast_fp16, y = var_3001_cast_fp16)[name = string("hidden_states_695_cast_fp16")]; tensor layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(782569792)))]; tensor hidden_states_699_cast_fp16 = mul(x = layers_15_input_layernorm_weight_to_fp16, y = hidden_states_695_cast_fp16)[name = string("hidden_states_699_cast_fp16")]; tensor var_3014_shape_cast_fp16 = shape(x = hidden_states_699_cast_fp16)[name = string("op_3014_shape_cast_fp16")]; int32 gather_212 = const()[name = string("gather_212"), val = int32(1)]; int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; string var_3014_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3014_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_213_indices_0_to_uint16 = const()[name = string("gather_213_indices_0_to_uint16"), val = uint16(1)]; tensor var_3014_shape_cast_fp16_to_uint16 = cast(dtype = var_3014_shape_cast_fp16_to_uint16_dtype_0, x = var_3014_shape_cast_fp16)[name = string("cast_303")]; uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = gather_213_indices_0_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_3014_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_15_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_15_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(782571904)))]; tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_15_self_attn_q_proj_weight_to_fp16, x = hidden_states_699_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_701_cast_fp16 = reshape(shape = concat_122x, x = linear_105_cast_fp16)[name = string("hidden_states_701_cast_fp16")]; fp16 var_2986_promoted_1_to_fp16 = const()[name = string("op_2986_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3022_cast_fp16 = pow(x = hidden_states_701_cast_fp16, y = var_2986_promoted_1_to_fp16)[name = string("op_3022_cast_fp16")]; tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([-1])]; bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; tensor variance_123_cast_fp16 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = var_3022_cast_fp16)[name = string("variance_123_cast_fp16")]; fp16 var_3025_to_fp16 = const()[name = string("op_3025_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3026_cast_fp16 = add(x = variance_123_cast_fp16, y = var_3025_to_fp16)[name = string("op_3026_cast_fp16")]; fp32 var_3027_epsilon_0 = const()[name = string("op_3027_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3027_cast_fp16 = rsqrt(epsilon = var_3027_epsilon_0, x = var_3026_cast_fp16)[name = string("op_3027_cast_fp16")]; tensor hidden_states_705_cast_fp16 = mul(x = hidden_states_701_cast_fp16, y = var_3027_cast_fp16)[name = string("hidden_states_705_cast_fp16")]; tensor layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786766272)))]; tensor var_3030_cast_fp16 = mul(x = layers_15_self_attn_q_norm_weight_to_fp16, y = hidden_states_705_cast_fp16)[name = string("op_3030_cast_fp16")]; tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_15_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_15_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786766592)))]; tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_k_proj_weight_to_fp16, x = hidden_states_699_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_709_cast_fp16 = reshape(shape = concat_123x, x = linear_106_cast_fp16)[name = string("hidden_states_709_cast_fp16")]; fp16 var_2986_promoted_2_to_fp16 = const()[name = string("op_2986_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3038_cast_fp16 = pow(x = hidden_states_709_cast_fp16, y = var_2986_promoted_2_to_fp16)[name = string("op_3038_cast_fp16")]; tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([-1])]; bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; tensor variance_125_cast_fp16 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = var_3038_cast_fp16)[name = string("variance_125_cast_fp16")]; fp16 var_3041_to_fp16 = const()[name = string("op_3041_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3042_cast_fp16 = add(x = variance_125_cast_fp16, y = var_3041_to_fp16)[name = string("op_3042_cast_fp16")]; fp32 var_3043_epsilon_0 = const()[name = string("op_3043_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3043_cast_fp16 = rsqrt(epsilon = var_3043_epsilon_0, x = var_3042_cast_fp16)[name = string("op_3043_cast_fp16")]; tensor hidden_states_713_cast_fp16 = mul(x = hidden_states_709_cast_fp16, y = var_3043_cast_fp16)[name = string("hidden_states_713_cast_fp16")]; tensor layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788863808)))]; tensor var_3046_cast_fp16 = mul(x = layers_15_self_attn_k_norm_weight_to_fp16, y = hidden_states_713_cast_fp16)[name = string("op_3046_cast_fp16")]; tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_15_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_15_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788864128)))]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_v_proj_weight_to_fp16, x = hidden_states_699_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 8, 128])]; tensor var_3051_cast_fp16 = reshape(shape = concat_124x, x = linear_107_cast_fp16)[name = string("op_3051_cast_fp16")]; tensor hidden_states_721_perm_0 = const()[name = string("hidden_states_721_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_3030_cast_fp16)[name = string("transpose_51")]; tensor var_3055_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3055_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3066_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_3066_cast_fp16")]; bool var_3068_interleave_0 = const()[name = string("op_3068_interleave_0"), val = bool(false)]; tensor var_3068_cast_fp16 = concat(axis = var_2987, interleave = var_3068_interleave_0, values = (var_3066_cast_fp16, x1_61_cast_fp16))[name = string("op_3068_cast_fp16")]; tensor var_3069_cast_fp16 = mul(x = var_3068_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3069_cast_fp16")]; tensor query_31_cast_fp16 = add(x = var_3055_cast_fp16, y = var_3069_cast_fp16)[name = string("query_31_cast_fp16")]; tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_3046_cast_fp16)[name = string("transpose_50")]; tensor var_3071_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3071_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3082_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_3082_cast_fp16")]; bool var_3084_interleave_0 = const()[name = string("op_3084_interleave_0"), val = bool(false)]; tensor var_3084_cast_fp16 = concat(axis = var_2987, interleave = var_3084_interleave_0, values = (var_3082_cast_fp16, x1_63_cast_fp16))[name = string("op_3084_cast_fp16")]; tensor var_3085_cast_fp16 = mul(x = var_3084_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3085_cast_fp16")]; tensor hidden_states_717_cast_fp16 = add(x = var_3071_cast_fp16, y = var_3085_cast_fp16)[name = string("hidden_states_717_cast_fp16")]; tensor var_3087_shape_cast_fp16 = shape(x = hidden_states_717_cast_fp16)[name = string("op_3087_shape_cast_fp16")]; int32 gather_218 = const()[name = string("gather_218"), val = int32(1)]; int32 gather_219 = const()[name = string("gather_219"), val = int32(8)]; int32 gather_220_axis_0 = const()[name = string("gather_220_axis_0"), val = int32(0)]; int32 gather_220_batch_dims_0 = const()[name = string("gather_220_batch_dims_0"), val = int32(0)]; bool gather_220_validate_indices_0 = const()[name = string("gather_220_validate_indices_0"), val = bool(false)]; string var_3087_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3087_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_220_indices_0_to_uint16 = const()[name = string("gather_220_indices_0_to_uint16"), val = uint16(2)]; tensor var_3087_shape_cast_fp16_to_uint16 = cast(dtype = var_3087_shape_cast_fp16_to_uint16_dtype_0, x = var_3087_shape_cast_fp16)[name = string("cast_301")]; uint16 gather_220_cast_uint16 = gather(axis = gather_220_axis_0, batch_dims = gather_220_batch_dims_0, indices = gather_220_indices_0_to_uint16, validate_indices = gather_220_validate_indices_0, x = var_3087_shape_cast_fp16_to_uint16)[name = string("gather_220_cast_uint16")]; string gather_220_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_220_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_221 = const()[name = string("gather_221"), val = int32(128)]; tensor var_3094_axes_0 = const()[name = string("op_3094_axes_0"), val = tensor([2])]; tensor var_3094_cast_fp16 = expand_dims(axes = var_3094_axes_0, x = hidden_states_717_cast_fp16)[name = string("op_3094_cast_fp16")]; int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; int32 gather_220_cast_uint16_to_int32 = cast(dtype = gather_220_cast_uint16_to_int32_dtype_0, x = gather_220_cast_uint16)[name = string("cast_300")]; tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_218, gather_219, var_2986, gather_220_cast_uint16_to_int32, gather_221))[name = string("concat_125")]; tensor shape_30_cast_fp16 = shape(x = var_3094_cast_fp16)[name = string("shape_30_cast_fp16")]; int32 equal_30_y_0 = const()[name = string("equal_30_y_0"), val = int32(-1)]; tensor equal_30 = equal(x = concat_125, y = equal_30_y_0)[name = string("equal_30")]; tensor select_30 = select(a = shape_30_cast_fp16, b = concat_125, cond = equal_30)[name = string("select_30")]; tensor real_div_30 = real_div(x = select_30, y = shape_30_cast_fp16)[name = string("real_div_30")]; tensor hidden_states_719_cast_fp16 = tile(reps = real_div_30, x = var_3094_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; tensor concat_126x = const()[name = string("concat_126x"), val = tensor([1, 16, -1, 128])]; tensor key_states_31_cast_fp16 = reshape(shape = concat_126x, x = hidden_states_719_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor hidden_states_721_cast_fp16 = transpose(perm = hidden_states_721_perm_0, x = var_3051_cast_fp16)[name = string("transpose_49")]; tensor var_3104_shape_cast_fp16 = shape(x = hidden_states_721_cast_fp16)[name = string("op_3104_shape_cast_fp16")]; int32 gather_222 = const()[name = string("gather_222"), val = int32(1)]; int32 gather_223 = const()[name = string("gather_223"), val = int32(8)]; int32 gather_224_axis_0 = const()[name = string("gather_224_axis_0"), val = int32(0)]; int32 gather_224_batch_dims_0 = const()[name = string("gather_224_batch_dims_0"), val = int32(0)]; bool gather_224_validate_indices_0 = const()[name = string("gather_224_validate_indices_0"), val = bool(false)]; string var_3104_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3104_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_224_indices_0_to_uint16 = const()[name = string("gather_224_indices_0_to_uint16"), val = uint16(2)]; tensor var_3104_shape_cast_fp16_to_uint16 = cast(dtype = var_3104_shape_cast_fp16_to_uint16_dtype_0, x = var_3104_shape_cast_fp16)[name = string("cast_299")]; uint16 gather_224_cast_uint16 = gather(axis = gather_224_axis_0, batch_dims = gather_224_batch_dims_0, indices = gather_224_indices_0_to_uint16, validate_indices = gather_224_validate_indices_0, x = var_3104_shape_cast_fp16_to_uint16)[name = string("gather_224_cast_uint16")]; string gather_224_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_224_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_225 = const()[name = string("gather_225"), val = int32(128)]; tensor var_3111_axes_0 = const()[name = string("op_3111_axes_0"), val = tensor([2])]; tensor var_3111_cast_fp16 = expand_dims(axes = var_3111_axes_0, x = hidden_states_721_cast_fp16)[name = string("op_3111_cast_fp16")]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; int32 gather_224_cast_uint16_to_int32 = cast(dtype = gather_224_cast_uint16_to_int32_dtype_0, x = gather_224_cast_uint16)[name = string("cast_298")]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_222, gather_223, var_2986, gather_224_cast_uint16_to_int32, gather_225))[name = string("concat_127")]; tensor shape_31_cast_fp16 = shape(x = var_3111_cast_fp16)[name = string("shape_31_cast_fp16")]; int32 equal_31_y_0 = const()[name = string("equal_31_y_0"), val = int32(-1)]; tensor equal_31 = equal(x = concat_127, y = equal_31_y_0)[name = string("equal_31")]; tensor select_31 = select(a = shape_31_cast_fp16, b = concat_127, cond = equal_31)[name = string("select_31")]; tensor real_div_31 = real_div(x = select_31, y = shape_31_cast_fp16)[name = string("real_div_31")]; tensor hidden_states_723_cast_fp16 = tile(reps = real_div_31, x = var_3111_cast_fp16)[name = string("hidden_states_723_cast_fp16")]; tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 16, -1, 128])]; tensor value_states_31_cast_fp16 = reshape(shape = concat_128x, x = hidden_states_723_cast_fp16)[name = string("value_states_31_cast_fp16")]; bool var_3122_transpose_x_1 = const()[name = string("op_3122_transpose_x_1"), val = bool(false)]; bool var_3122_transpose_y_1 = const()[name = string("op_3122_transpose_y_1"), val = bool(true)]; tensor var_3122_cast_fp16 = matmul(transpose_x = var_3122_transpose_x_1, transpose_y = var_3122_transpose_y_1, x = query_31_cast_fp16, y = key_states_31_cast_fp16)[name = string("op_3122_cast_fp16")]; fp16 var_3123_to_fp16 = const()[name = string("op_3123_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_3122_cast_fp16, y = var_3123_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor input_183_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_183_cast_fp16")]; tensor var_3126_cast_fp16 = softmax(axis = var_2987, x = input_183_cast_fp16)[name = string("op_3126_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_3126_cast_fp16, y = value_states_31_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_3130_perm_0 = const()[name = string("op_3130_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_302")]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_212, gather_213_cast_uint16_to_int32, var_2987))[name = string("concat_129")]; tensor var_3130_cast_fp16 = transpose(perm = var_3130_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_48")]; tensor var_3133_cast_fp16 = reshape(shape = concat_129, x = var_3130_cast_fp16)[name = string("op_3133_cast_fp16")]; tensor layers_15_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_15_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790961344)))]; tensor linear_108_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_o_proj_weight_to_fp16, x = var_3133_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor hidden_states_727_cast_fp16 = add(x = hidden_states_691_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_727_cast_fp16")]; fp16 var_2986_promoted_3_to_fp16 = const()[name = string("op_2986_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3140_cast_fp16 = pow(x = hidden_states_727_cast_fp16, y = var_2986_promoted_3_to_fp16)[name = string("op_3140_cast_fp16")]; tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_3140_cast_fp16)[name = string("variance_127_cast_fp16")]; fp16 var_3143_to_fp16 = const()[name = string("op_3143_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3144_cast_fp16 = add(x = variance_127_cast_fp16, y = var_3143_to_fp16)[name = string("op_3144_cast_fp16")]; fp32 var_3145_epsilon_0 = const()[name = string("op_3145_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3145_cast_fp16 = rsqrt(epsilon = var_3145_epsilon_0, x = var_3144_cast_fp16)[name = string("op_3145_cast_fp16")]; tensor hidden_states_731_cast_fp16 = mul(x = hidden_states_727_cast_fp16, y = var_3145_cast_fp16)[name = string("hidden_states_731_cast_fp16")]; tensor layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795155712)))]; tensor input_189_cast_fp16 = mul(x = layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_731_cast_fp16)[name = string("input_189_cast_fp16")]; tensor layers_15_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_15_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795157824)))]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_gate_proj_weight_to_fp16, x = input_189_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_3157_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3157_cast_fp16")]; tensor layers_15_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_15_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801449344)))]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_up_proj_weight_to_fp16, x = input_189_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_193_cast_fp16 = mul(x = var_3157_cast_fp16, y = linear_110_cast_fp16)[name = string("input_193_cast_fp16")]; tensor layers_15_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_15_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807740864)))]; tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_mlp_down_proj_weight_to_fp16, x = input_193_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor hidden_states_737_cast_fp16 = add(x = hidden_states_727_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_737_cast_fp16")]; int32 var_3174 = const()[name = string("op_3174"), val = int32(2)]; int32 var_3175 = const()[name = string("op_3175"), val = int32(-1)]; fp16 var_3174_promoted_to_fp16 = const()[name = string("op_3174_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3184_cast_fp16 = pow(x = hidden_states_737_cast_fp16, y = var_3174_promoted_to_fp16)[name = string("op_3184_cast_fp16")]; tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([-1])]; bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; tensor variance_129_cast_fp16 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = var_3184_cast_fp16)[name = string("variance_129_cast_fp16")]; fp16 var_3187_to_fp16 = const()[name = string("op_3187_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3188_cast_fp16 = add(x = variance_129_cast_fp16, y = var_3187_to_fp16)[name = string("op_3188_cast_fp16")]; fp32 var_3189_epsilon_0 = const()[name = string("op_3189_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3189_cast_fp16 = rsqrt(epsilon = var_3189_epsilon_0, x = var_3188_cast_fp16)[name = string("op_3189_cast_fp16")]; tensor hidden_states_741_cast_fp16 = mul(x = hidden_states_737_cast_fp16, y = var_3189_cast_fp16)[name = string("hidden_states_741_cast_fp16")]; tensor layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814032384)))]; tensor hidden_states_745_cast_fp16 = mul(x = layers_16_input_layernorm_weight_to_fp16, y = hidden_states_741_cast_fp16)[name = string("hidden_states_745_cast_fp16")]; tensor var_3202_shape_cast_fp16 = shape(x = hidden_states_745_cast_fp16)[name = string("op_3202_shape_cast_fp16")]; int32 gather_226 = const()[name = string("gather_226"), val = int32(1)]; int32 gather_227_axis_0 = const()[name = string("gather_227_axis_0"), val = int32(0)]; int32 gather_227_batch_dims_0 = const()[name = string("gather_227_batch_dims_0"), val = int32(0)]; bool gather_227_validate_indices_0 = const()[name = string("gather_227_validate_indices_0"), val = bool(false)]; string var_3202_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3202_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_227_indices_0_to_uint16 = const()[name = string("gather_227_indices_0_to_uint16"), val = uint16(1)]; tensor var_3202_shape_cast_fp16_to_uint16 = cast(dtype = var_3202_shape_cast_fp16_to_uint16_dtype_0, x = var_3202_shape_cast_fp16)[name = string("cast_297")]; uint16 gather_227_cast_uint16 = gather(axis = gather_227_axis_0, batch_dims = gather_227_batch_dims_0, indices = gather_227_indices_0_to_uint16, validate_indices = gather_227_validate_indices_0, x = var_3202_shape_cast_fp16_to_uint16)[name = string("gather_227_cast_uint16")]; string gather_227_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_227_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_16_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_16_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814034496)))]; tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_16_self_attn_q_proj_weight_to_fp16, x = hidden_states_745_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_747_cast_fp16 = reshape(shape = concat_130x, x = linear_112_cast_fp16)[name = string("hidden_states_747_cast_fp16")]; fp16 var_3174_promoted_1_to_fp16 = const()[name = string("op_3174_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3210_cast_fp16 = pow(x = hidden_states_747_cast_fp16, y = var_3174_promoted_1_to_fp16)[name = string("op_3210_cast_fp16")]; tensor variance_131_axes_0 = const()[name = string("variance_131_axes_0"), val = tensor([-1])]; bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; tensor variance_131_cast_fp16 = reduce_mean(axes = variance_131_axes_0, keep_dims = variance_131_keep_dims_0, x = var_3210_cast_fp16)[name = string("variance_131_cast_fp16")]; fp16 var_3213_to_fp16 = const()[name = string("op_3213_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3214_cast_fp16 = add(x = variance_131_cast_fp16, y = var_3213_to_fp16)[name = string("op_3214_cast_fp16")]; fp32 var_3215_epsilon_0 = const()[name = string("op_3215_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3215_cast_fp16 = rsqrt(epsilon = var_3215_epsilon_0, x = var_3214_cast_fp16)[name = string("op_3215_cast_fp16")]; tensor hidden_states_751_cast_fp16 = mul(x = hidden_states_747_cast_fp16, y = var_3215_cast_fp16)[name = string("hidden_states_751_cast_fp16")]; tensor layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818228864)))]; tensor var_3218_cast_fp16 = mul(x = layers_16_self_attn_q_norm_weight_to_fp16, y = hidden_states_751_cast_fp16)[name = string("op_3218_cast_fp16")]; tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_16_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_16_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818229184)))]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_k_proj_weight_to_fp16, x = hidden_states_745_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor concat_131x = const()[name = string("concat_131x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_755_cast_fp16 = reshape(shape = concat_131x, x = linear_113_cast_fp16)[name = string("hidden_states_755_cast_fp16")]; fp16 var_3174_promoted_2_to_fp16 = const()[name = string("op_3174_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3226_cast_fp16 = pow(x = hidden_states_755_cast_fp16, y = var_3174_promoted_2_to_fp16)[name = string("op_3226_cast_fp16")]; tensor variance_133_axes_0 = const()[name = string("variance_133_axes_0"), val = tensor([-1])]; bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; tensor variance_133_cast_fp16 = reduce_mean(axes = variance_133_axes_0, keep_dims = variance_133_keep_dims_0, x = var_3226_cast_fp16)[name = string("variance_133_cast_fp16")]; fp16 var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3230_cast_fp16 = add(x = variance_133_cast_fp16, y = var_3229_to_fp16)[name = string("op_3230_cast_fp16")]; fp32 var_3231_epsilon_0 = const()[name = string("op_3231_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3231_cast_fp16 = rsqrt(epsilon = var_3231_epsilon_0, x = var_3230_cast_fp16)[name = string("op_3231_cast_fp16")]; tensor hidden_states_759_cast_fp16 = mul(x = hidden_states_755_cast_fp16, y = var_3231_cast_fp16)[name = string("hidden_states_759_cast_fp16")]; tensor layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820326400)))]; tensor var_3234_cast_fp16 = mul(x = layers_16_self_attn_k_norm_weight_to_fp16, y = hidden_states_759_cast_fp16)[name = string("op_3234_cast_fp16")]; tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_16_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_16_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820326720)))]; tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_v_proj_weight_to_fp16, x = hidden_states_745_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 8, 128])]; tensor var_3239_cast_fp16 = reshape(shape = concat_132x, x = linear_114_cast_fp16)[name = string("op_3239_cast_fp16")]; tensor hidden_states_767_perm_0 = const()[name = string("hidden_states_767_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_3218_cast_fp16)[name = string("transpose_47")]; tensor var_3243_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3243_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3254_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3254_cast_fp16")]; bool var_3256_interleave_0 = const()[name = string("op_3256_interleave_0"), val = bool(false)]; tensor var_3256_cast_fp16 = concat(axis = var_3175, interleave = var_3256_interleave_0, values = (var_3254_cast_fp16, x1_65_cast_fp16))[name = string("op_3256_cast_fp16")]; tensor var_3257_cast_fp16 = mul(x = var_3256_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3257_cast_fp16")]; tensor query_33_cast_fp16 = add(x = var_3243_cast_fp16, y = var_3257_cast_fp16)[name = string("query_33_cast_fp16")]; tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_3234_cast_fp16)[name = string("transpose_46")]; tensor var_3259_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3259_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3270_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3270_cast_fp16")]; bool var_3272_interleave_0 = const()[name = string("op_3272_interleave_0"), val = bool(false)]; tensor var_3272_cast_fp16 = concat(axis = var_3175, interleave = var_3272_interleave_0, values = (var_3270_cast_fp16, x1_67_cast_fp16))[name = string("op_3272_cast_fp16")]; tensor var_3273_cast_fp16 = mul(x = var_3272_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3273_cast_fp16")]; tensor hidden_states_763_cast_fp16 = add(x = var_3259_cast_fp16, y = var_3273_cast_fp16)[name = string("hidden_states_763_cast_fp16")]; tensor var_3275_shape_cast_fp16 = shape(x = hidden_states_763_cast_fp16)[name = string("op_3275_shape_cast_fp16")]; int32 gather_232 = const()[name = string("gather_232"), val = int32(1)]; int32 gather_233 = const()[name = string("gather_233"), val = int32(8)]; int32 gather_234_axis_0 = const()[name = string("gather_234_axis_0"), val = int32(0)]; int32 gather_234_batch_dims_0 = const()[name = string("gather_234_batch_dims_0"), val = int32(0)]; bool gather_234_validate_indices_0 = const()[name = string("gather_234_validate_indices_0"), val = bool(false)]; string var_3275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_234_indices_0_to_uint16 = const()[name = string("gather_234_indices_0_to_uint16"), val = uint16(2)]; tensor var_3275_shape_cast_fp16_to_uint16 = cast(dtype = var_3275_shape_cast_fp16_to_uint16_dtype_0, x = var_3275_shape_cast_fp16)[name = string("cast_295")]; uint16 gather_234_cast_uint16 = gather(axis = gather_234_axis_0, batch_dims = gather_234_batch_dims_0, indices = gather_234_indices_0_to_uint16, validate_indices = gather_234_validate_indices_0, x = var_3275_shape_cast_fp16_to_uint16)[name = string("gather_234_cast_uint16")]; string gather_234_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_234_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_235 = const()[name = string("gather_235"), val = int32(128)]; tensor var_3282_axes_0 = const()[name = string("op_3282_axes_0"), val = tensor([2])]; tensor var_3282_cast_fp16 = expand_dims(axes = var_3282_axes_0, x = hidden_states_763_cast_fp16)[name = string("op_3282_cast_fp16")]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; int32 gather_234_cast_uint16_to_int32 = cast(dtype = gather_234_cast_uint16_to_int32_dtype_0, x = gather_234_cast_uint16)[name = string("cast_294")]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (gather_232, gather_233, var_3174, gather_234_cast_uint16_to_int32, gather_235))[name = string("concat_133")]; tensor shape_32_cast_fp16 = shape(x = var_3282_cast_fp16)[name = string("shape_32_cast_fp16")]; int32 equal_32_y_0 = const()[name = string("equal_32_y_0"), val = int32(-1)]; tensor equal_32 = equal(x = concat_133, y = equal_32_y_0)[name = string("equal_32")]; tensor select_32 = select(a = shape_32_cast_fp16, b = concat_133, cond = equal_32)[name = string("select_32")]; tensor real_div_32 = real_div(x = select_32, y = shape_32_cast_fp16)[name = string("real_div_32")]; tensor hidden_states_765_cast_fp16 = tile(reps = real_div_32, x = var_3282_cast_fp16)[name = string("hidden_states_765_cast_fp16")]; tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, 16, -1, 128])]; tensor key_states_33_cast_fp16 = reshape(shape = concat_134x, x = hidden_states_765_cast_fp16)[name = string("key_states_33_cast_fp16")]; tensor hidden_states_767_cast_fp16 = transpose(perm = hidden_states_767_perm_0, x = var_3239_cast_fp16)[name = string("transpose_45")]; tensor var_3292_shape_cast_fp16 = shape(x = hidden_states_767_cast_fp16)[name = string("op_3292_shape_cast_fp16")]; int32 gather_236 = const()[name = string("gather_236"), val = int32(1)]; int32 gather_237 = const()[name = string("gather_237"), val = int32(8)]; int32 gather_238_axis_0 = const()[name = string("gather_238_axis_0"), val = int32(0)]; int32 gather_238_batch_dims_0 = const()[name = string("gather_238_batch_dims_0"), val = int32(0)]; bool gather_238_validate_indices_0 = const()[name = string("gather_238_validate_indices_0"), val = bool(false)]; string var_3292_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3292_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_238_indices_0_to_uint16 = const()[name = string("gather_238_indices_0_to_uint16"), val = uint16(2)]; tensor var_3292_shape_cast_fp16_to_uint16 = cast(dtype = var_3292_shape_cast_fp16_to_uint16_dtype_0, x = var_3292_shape_cast_fp16)[name = string("cast_293")]; uint16 gather_238_cast_uint16 = gather(axis = gather_238_axis_0, batch_dims = gather_238_batch_dims_0, indices = gather_238_indices_0_to_uint16, validate_indices = gather_238_validate_indices_0, x = var_3292_shape_cast_fp16_to_uint16)[name = string("gather_238_cast_uint16")]; string gather_238_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_238_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_239 = const()[name = string("gather_239"), val = int32(128)]; tensor var_3299_axes_0 = const()[name = string("op_3299_axes_0"), val = tensor([2])]; tensor var_3299_cast_fp16 = expand_dims(axes = var_3299_axes_0, x = hidden_states_767_cast_fp16)[name = string("op_3299_cast_fp16")]; int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; int32 gather_238_cast_uint16_to_int32 = cast(dtype = gather_238_cast_uint16_to_int32_dtype_0, x = gather_238_cast_uint16)[name = string("cast_292")]; tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (gather_236, gather_237, var_3174, gather_238_cast_uint16_to_int32, gather_239))[name = string("concat_135")]; tensor shape_33_cast_fp16 = shape(x = var_3299_cast_fp16)[name = string("shape_33_cast_fp16")]; int32 equal_33_y_0 = const()[name = string("equal_33_y_0"), val = int32(-1)]; tensor equal_33 = equal(x = concat_135, y = equal_33_y_0)[name = string("equal_33")]; tensor select_33 = select(a = shape_33_cast_fp16, b = concat_135, cond = equal_33)[name = string("select_33")]; tensor real_div_33 = real_div(x = select_33, y = shape_33_cast_fp16)[name = string("real_div_33")]; tensor hidden_states_769_cast_fp16 = tile(reps = real_div_33, x = var_3299_cast_fp16)[name = string("hidden_states_769_cast_fp16")]; tensor concat_136x = const()[name = string("concat_136x"), val = tensor([1, 16, -1, 128])]; tensor value_states_33_cast_fp16 = reshape(shape = concat_136x, x = hidden_states_769_cast_fp16)[name = string("value_states_33_cast_fp16")]; bool var_3310_transpose_x_1 = const()[name = string("op_3310_transpose_x_1"), val = bool(false)]; bool var_3310_transpose_y_1 = const()[name = string("op_3310_transpose_y_1"), val = bool(true)]; tensor var_3310_cast_fp16 = matmul(transpose_x = var_3310_transpose_x_1, transpose_y = var_3310_transpose_y_1, x = query_33_cast_fp16, y = key_states_33_cast_fp16)[name = string("op_3310_cast_fp16")]; fp16 var_3311_to_fp16 = const()[name = string("op_3311_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_65_cast_fp16 = mul(x = var_3310_cast_fp16, y = var_3311_to_fp16)[name = string("attn_weights_65_cast_fp16")]; tensor input_195_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_195_cast_fp16")]; tensor var_3314_cast_fp16 = softmax(axis = var_3175, x = input_195_cast_fp16)[name = string("op_3314_cast_fp16")]; bool attn_output_65_transpose_x_0 = const()[name = string("attn_output_65_transpose_x_0"), val = bool(false)]; bool attn_output_65_transpose_y_0 = const()[name = string("attn_output_65_transpose_y_0"), val = bool(false)]; tensor attn_output_65_cast_fp16 = matmul(transpose_x = attn_output_65_transpose_x_0, transpose_y = attn_output_65_transpose_y_0, x = var_3314_cast_fp16, y = value_states_33_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_3318_perm_0 = const()[name = string("op_3318_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; int32 gather_227_cast_uint16_to_int32 = cast(dtype = gather_227_cast_uint16_to_int32_dtype_0, x = gather_227_cast_uint16)[name = string("cast_296")]; tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (gather_226, gather_227_cast_uint16_to_int32, var_3175))[name = string("concat_137")]; tensor var_3318_cast_fp16 = transpose(perm = var_3318_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_44")]; tensor var_3321_cast_fp16 = reshape(shape = concat_137, x = var_3318_cast_fp16)[name = string("op_3321_cast_fp16")]; tensor layers_16_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_16_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(822423936)))]; tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_o_proj_weight_to_fp16, x = var_3321_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor hidden_states_773_cast_fp16 = add(x = hidden_states_737_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_773_cast_fp16")]; fp16 var_3174_promoted_3_to_fp16 = const()[name = string("op_3174_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3328_cast_fp16 = pow(x = hidden_states_773_cast_fp16, y = var_3174_promoted_3_to_fp16)[name = string("op_3328_cast_fp16")]; tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([-1])]; bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; tensor variance_135_cast_fp16 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = var_3328_cast_fp16)[name = string("variance_135_cast_fp16")]; fp16 var_3331_to_fp16 = const()[name = string("op_3331_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3332_cast_fp16 = add(x = variance_135_cast_fp16, y = var_3331_to_fp16)[name = string("op_3332_cast_fp16")]; fp32 var_3333_epsilon_0 = const()[name = string("op_3333_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3333_cast_fp16 = rsqrt(epsilon = var_3333_epsilon_0, x = var_3332_cast_fp16)[name = string("op_3333_cast_fp16")]; tensor hidden_states_777_cast_fp16 = mul(x = hidden_states_773_cast_fp16, y = var_3333_cast_fp16)[name = string("hidden_states_777_cast_fp16")]; tensor layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826618304)))]; tensor input_201_cast_fp16 = mul(x = layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_777_cast_fp16)[name = string("input_201_cast_fp16")]; tensor layers_16_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_16_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826620416)))]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_gate_proj_weight_to_fp16, x = input_201_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_3345_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3345_cast_fp16")]; tensor layers_16_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_16_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832911936)))]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_up_proj_weight_to_fp16, x = input_201_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_205_cast_fp16 = mul(x = var_3345_cast_fp16, y = linear_117_cast_fp16)[name = string("input_205_cast_fp16")]; tensor layers_16_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_16_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839203456)))]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_mlp_down_proj_weight_to_fp16, x = input_205_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor hidden_states_783_cast_fp16 = add(x = hidden_states_773_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_783_cast_fp16")]; int32 var_3362 = const()[name = string("op_3362"), val = int32(2)]; int32 var_3363 = const()[name = string("op_3363"), val = int32(-1)]; fp16 var_3362_promoted_to_fp16 = const()[name = string("op_3362_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3372_cast_fp16 = pow(x = hidden_states_783_cast_fp16, y = var_3362_promoted_to_fp16)[name = string("op_3372_cast_fp16")]; tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([-1])]; bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; tensor variance_137_cast_fp16 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = var_3372_cast_fp16)[name = string("variance_137_cast_fp16")]; fp16 var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3376_cast_fp16 = add(x = variance_137_cast_fp16, y = var_3375_to_fp16)[name = string("op_3376_cast_fp16")]; fp32 var_3377_epsilon_0 = const()[name = string("op_3377_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3377_cast_fp16 = rsqrt(epsilon = var_3377_epsilon_0, x = var_3376_cast_fp16)[name = string("op_3377_cast_fp16")]; tensor hidden_states_787_cast_fp16 = mul(x = hidden_states_783_cast_fp16, y = var_3377_cast_fp16)[name = string("hidden_states_787_cast_fp16")]; tensor layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845494976)))]; tensor hidden_states_791_cast_fp16 = mul(x = layers_17_input_layernorm_weight_to_fp16, y = hidden_states_787_cast_fp16)[name = string("hidden_states_791_cast_fp16")]; tensor var_3390_shape_cast_fp16 = shape(x = hidden_states_791_cast_fp16)[name = string("op_3390_shape_cast_fp16")]; int32 gather_240 = const()[name = string("gather_240"), val = int32(1)]; int32 gather_241_axis_0 = const()[name = string("gather_241_axis_0"), val = int32(0)]; int32 gather_241_batch_dims_0 = const()[name = string("gather_241_batch_dims_0"), val = int32(0)]; bool gather_241_validate_indices_0 = const()[name = string("gather_241_validate_indices_0"), val = bool(false)]; string var_3390_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3390_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_241_indices_0_to_uint16 = const()[name = string("gather_241_indices_0_to_uint16"), val = uint16(1)]; tensor var_3390_shape_cast_fp16_to_uint16 = cast(dtype = var_3390_shape_cast_fp16_to_uint16_dtype_0, x = var_3390_shape_cast_fp16)[name = string("cast_291")]; uint16 gather_241_cast_uint16 = gather(axis = gather_241_axis_0, batch_dims = gather_241_batch_dims_0, indices = gather_241_indices_0_to_uint16, validate_indices = gather_241_validate_indices_0, x = var_3390_shape_cast_fp16_to_uint16)[name = string("gather_241_cast_uint16")]; string gather_241_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_241_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_17_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_17_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845497088)))]; tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_17_self_attn_q_proj_weight_to_fp16, x = hidden_states_791_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor concat_138x = const()[name = string("concat_138x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_793_cast_fp16 = reshape(shape = concat_138x, x = linear_119_cast_fp16)[name = string("hidden_states_793_cast_fp16")]; fp16 var_3362_promoted_1_to_fp16 = const()[name = string("op_3362_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3398_cast_fp16 = pow(x = hidden_states_793_cast_fp16, y = var_3362_promoted_1_to_fp16)[name = string("op_3398_cast_fp16")]; tensor variance_139_axes_0 = const()[name = string("variance_139_axes_0"), val = tensor([-1])]; bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; tensor variance_139_cast_fp16 = reduce_mean(axes = variance_139_axes_0, keep_dims = variance_139_keep_dims_0, x = var_3398_cast_fp16)[name = string("variance_139_cast_fp16")]; fp16 var_3401_to_fp16 = const()[name = string("op_3401_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3402_cast_fp16 = add(x = variance_139_cast_fp16, y = var_3401_to_fp16)[name = string("op_3402_cast_fp16")]; fp32 var_3403_epsilon_0 = const()[name = string("op_3403_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3403_cast_fp16 = rsqrt(epsilon = var_3403_epsilon_0, x = var_3402_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor hidden_states_797_cast_fp16 = mul(x = hidden_states_793_cast_fp16, y = var_3403_cast_fp16)[name = string("hidden_states_797_cast_fp16")]; tensor layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849691456)))]; tensor var_3406_cast_fp16 = mul(x = layers_17_self_attn_q_norm_weight_to_fp16, y = hidden_states_797_cast_fp16)[name = string("op_3406_cast_fp16")]; tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_17_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_17_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849691776)))]; tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_k_proj_weight_to_fp16, x = hidden_states_791_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor concat_139x = const()[name = string("concat_139x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_801_cast_fp16 = reshape(shape = concat_139x, x = linear_120_cast_fp16)[name = string("hidden_states_801_cast_fp16")]; fp16 var_3362_promoted_2_to_fp16 = const()[name = string("op_3362_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3414_cast_fp16 = pow(x = hidden_states_801_cast_fp16, y = var_3362_promoted_2_to_fp16)[name = string("op_3414_cast_fp16")]; tensor variance_141_axes_0 = const()[name = string("variance_141_axes_0"), val = tensor([-1])]; bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; tensor variance_141_cast_fp16 = reduce_mean(axes = variance_141_axes_0, keep_dims = variance_141_keep_dims_0, x = var_3414_cast_fp16)[name = string("variance_141_cast_fp16")]; fp16 var_3417_to_fp16 = const()[name = string("op_3417_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3418_cast_fp16 = add(x = variance_141_cast_fp16, y = var_3417_to_fp16)[name = string("op_3418_cast_fp16")]; fp32 var_3419_epsilon_0 = const()[name = string("op_3419_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3419_cast_fp16 = rsqrt(epsilon = var_3419_epsilon_0, x = var_3418_cast_fp16)[name = string("op_3419_cast_fp16")]; tensor hidden_states_805_cast_fp16 = mul(x = hidden_states_801_cast_fp16, y = var_3419_cast_fp16)[name = string("hidden_states_805_cast_fp16")]; tensor layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851788992)))]; tensor var_3422_cast_fp16 = mul(x = layers_17_self_attn_k_norm_weight_to_fp16, y = hidden_states_805_cast_fp16)[name = string("op_3422_cast_fp16")]; tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_17_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_17_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851789312)))]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_v_proj_weight_to_fp16, x = hidden_states_791_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor concat_140x = const()[name = string("concat_140x"), val = tensor([1, -1, 8, 128])]; tensor var_3427_cast_fp16 = reshape(shape = concat_140x, x = linear_121_cast_fp16)[name = string("op_3427_cast_fp16")]; tensor hidden_states_813_perm_0 = const()[name = string("hidden_states_813_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_3406_cast_fp16)[name = string("transpose_43")]; tensor var_3431_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3431_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3442_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3442_cast_fp16")]; bool var_3444_interleave_0 = const()[name = string("op_3444_interleave_0"), val = bool(false)]; tensor var_3444_cast_fp16 = concat(axis = var_3363, interleave = var_3444_interleave_0, values = (var_3442_cast_fp16, x1_69_cast_fp16))[name = string("op_3444_cast_fp16")]; tensor var_3445_cast_fp16 = mul(x = var_3444_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3445_cast_fp16")]; tensor query_35_cast_fp16 = add(x = var_3431_cast_fp16, y = var_3445_cast_fp16)[name = string("query_35_cast_fp16")]; tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_3422_cast_fp16)[name = string("transpose_42")]; tensor var_3447_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3447_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3458_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3458_cast_fp16")]; bool var_3460_interleave_0 = const()[name = string("op_3460_interleave_0"), val = bool(false)]; tensor var_3460_cast_fp16 = concat(axis = var_3363, interleave = var_3460_interleave_0, values = (var_3458_cast_fp16, x1_71_cast_fp16))[name = string("op_3460_cast_fp16")]; tensor var_3461_cast_fp16 = mul(x = var_3460_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3461_cast_fp16")]; tensor hidden_states_809_cast_fp16 = add(x = var_3447_cast_fp16, y = var_3461_cast_fp16)[name = string("hidden_states_809_cast_fp16")]; tensor var_3463_shape_cast_fp16 = shape(x = hidden_states_809_cast_fp16)[name = string("op_3463_shape_cast_fp16")]; int32 gather_246 = const()[name = string("gather_246"), val = int32(1)]; int32 gather_247 = const()[name = string("gather_247"), val = int32(8)]; int32 gather_248_axis_0 = const()[name = string("gather_248_axis_0"), val = int32(0)]; int32 gather_248_batch_dims_0 = const()[name = string("gather_248_batch_dims_0"), val = int32(0)]; bool gather_248_validate_indices_0 = const()[name = string("gather_248_validate_indices_0"), val = bool(false)]; string var_3463_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3463_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_248_indices_0_to_uint16 = const()[name = string("gather_248_indices_0_to_uint16"), val = uint16(2)]; tensor var_3463_shape_cast_fp16_to_uint16 = cast(dtype = var_3463_shape_cast_fp16_to_uint16_dtype_0, x = var_3463_shape_cast_fp16)[name = string("cast_289")]; uint16 gather_248_cast_uint16 = gather(axis = gather_248_axis_0, batch_dims = gather_248_batch_dims_0, indices = gather_248_indices_0_to_uint16, validate_indices = gather_248_validate_indices_0, x = var_3463_shape_cast_fp16_to_uint16)[name = string("gather_248_cast_uint16")]; string gather_248_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_248_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_249 = const()[name = string("gather_249"), val = int32(128)]; tensor var_3470_axes_0 = const()[name = string("op_3470_axes_0"), val = tensor([2])]; tensor var_3470_cast_fp16 = expand_dims(axes = var_3470_axes_0, x = hidden_states_809_cast_fp16)[name = string("op_3470_cast_fp16")]; int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)]; bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)]; int32 gather_248_cast_uint16_to_int32 = cast(dtype = gather_248_cast_uint16_to_int32_dtype_0, x = gather_248_cast_uint16)[name = string("cast_288")]; tensor concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (gather_246, gather_247, var_3362, gather_248_cast_uint16_to_int32, gather_249))[name = string("concat_141")]; tensor shape_34_cast_fp16 = shape(x = var_3470_cast_fp16)[name = string("shape_34_cast_fp16")]; int32 equal_34_y_0 = const()[name = string("equal_34_y_0"), val = int32(-1)]; tensor equal_34 = equal(x = concat_141, y = equal_34_y_0)[name = string("equal_34")]; tensor select_34 = select(a = shape_34_cast_fp16, b = concat_141, cond = equal_34)[name = string("select_34")]; tensor real_div_34 = real_div(x = select_34, y = shape_34_cast_fp16)[name = string("real_div_34")]; tensor hidden_states_811_cast_fp16 = tile(reps = real_div_34, x = var_3470_cast_fp16)[name = string("hidden_states_811_cast_fp16")]; tensor concat_142x = const()[name = string("concat_142x"), val = tensor([1, 16, -1, 128])]; tensor key_states_35_cast_fp16 = reshape(shape = concat_142x, x = hidden_states_811_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor hidden_states_813_cast_fp16 = transpose(perm = hidden_states_813_perm_0, x = var_3427_cast_fp16)[name = string("transpose_41")]; tensor var_3480_shape_cast_fp16 = shape(x = hidden_states_813_cast_fp16)[name = string("op_3480_shape_cast_fp16")]; int32 gather_250 = const()[name = string("gather_250"), val = int32(1)]; int32 gather_251 = const()[name = string("gather_251"), val = int32(8)]; int32 gather_252_axis_0 = const()[name = string("gather_252_axis_0"), val = int32(0)]; int32 gather_252_batch_dims_0 = const()[name = string("gather_252_batch_dims_0"), val = int32(0)]; bool gather_252_validate_indices_0 = const()[name = string("gather_252_validate_indices_0"), val = bool(false)]; string var_3480_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3480_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_252_indices_0_to_uint16 = const()[name = string("gather_252_indices_0_to_uint16"), val = uint16(2)]; tensor var_3480_shape_cast_fp16_to_uint16 = cast(dtype = var_3480_shape_cast_fp16_to_uint16_dtype_0, x = var_3480_shape_cast_fp16)[name = string("cast_287")]; uint16 gather_252_cast_uint16 = gather(axis = gather_252_axis_0, batch_dims = gather_252_batch_dims_0, indices = gather_252_indices_0_to_uint16, validate_indices = gather_252_validate_indices_0, x = var_3480_shape_cast_fp16_to_uint16)[name = string("gather_252_cast_uint16")]; string gather_252_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_252_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_253 = const()[name = string("gather_253"), val = int32(128)]; tensor var_3487_axes_0 = const()[name = string("op_3487_axes_0"), val = tensor([2])]; tensor var_3487_cast_fp16 = expand_dims(axes = var_3487_axes_0, x = hidden_states_813_cast_fp16)[name = string("op_3487_cast_fp16")]; int32 concat_143_axis_0 = const()[name = string("concat_143_axis_0"), val = int32(0)]; bool concat_143_interleave_0 = const()[name = string("concat_143_interleave_0"), val = bool(false)]; int32 gather_252_cast_uint16_to_int32 = cast(dtype = gather_252_cast_uint16_to_int32_dtype_0, x = gather_252_cast_uint16)[name = string("cast_286")]; tensor concat_143 = concat(axis = concat_143_axis_0, interleave = concat_143_interleave_0, values = (gather_250, gather_251, var_3362, gather_252_cast_uint16_to_int32, gather_253))[name = string("concat_143")]; tensor shape_35_cast_fp16 = shape(x = var_3487_cast_fp16)[name = string("shape_35_cast_fp16")]; int32 equal_35_y_0 = const()[name = string("equal_35_y_0"), val = int32(-1)]; tensor equal_35 = equal(x = concat_143, y = equal_35_y_0)[name = string("equal_35")]; tensor select_35 = select(a = shape_35_cast_fp16, b = concat_143, cond = equal_35)[name = string("select_35")]; tensor real_div_35 = real_div(x = select_35, y = shape_35_cast_fp16)[name = string("real_div_35")]; tensor hidden_states_815_cast_fp16 = tile(reps = real_div_35, x = var_3487_cast_fp16)[name = string("hidden_states_815_cast_fp16")]; tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, 16, -1, 128])]; tensor value_states_35_cast_fp16 = reshape(shape = concat_144x, x = hidden_states_815_cast_fp16)[name = string("value_states_35_cast_fp16")]; bool var_3498_transpose_x_1 = const()[name = string("op_3498_transpose_x_1"), val = bool(false)]; bool var_3498_transpose_y_1 = const()[name = string("op_3498_transpose_y_1"), val = bool(true)]; tensor var_3498_cast_fp16 = matmul(transpose_x = var_3498_transpose_x_1, transpose_y = var_3498_transpose_y_1, x = query_35_cast_fp16, y = key_states_35_cast_fp16)[name = string("op_3498_cast_fp16")]; fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_69_cast_fp16 = mul(x = var_3498_cast_fp16, y = var_3499_to_fp16)[name = string("attn_weights_69_cast_fp16")]; tensor input_207_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_207_cast_fp16")]; tensor var_3502_cast_fp16 = softmax(axis = var_3363, x = input_207_cast_fp16)[name = string("op_3502_cast_fp16")]; bool attn_output_69_transpose_x_0 = const()[name = string("attn_output_69_transpose_x_0"), val = bool(false)]; bool attn_output_69_transpose_y_0 = const()[name = string("attn_output_69_transpose_y_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = matmul(transpose_x = attn_output_69_transpose_x_0, transpose_y = attn_output_69_transpose_y_0, x = var_3502_cast_fp16, y = value_states_35_cast_fp16)[name = string("attn_output_69_cast_fp16")]; tensor var_3506_perm_0 = const()[name = string("op_3506_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_145_axis_0 = const()[name = string("concat_145_axis_0"), val = int32(0)]; bool concat_145_interleave_0 = const()[name = string("concat_145_interleave_0"), val = bool(false)]; int32 gather_241_cast_uint16_to_int32 = cast(dtype = gather_241_cast_uint16_to_int32_dtype_0, x = gather_241_cast_uint16)[name = string("cast_290")]; tensor concat_145 = concat(axis = concat_145_axis_0, interleave = concat_145_interleave_0, values = (gather_240, gather_241_cast_uint16_to_int32, var_3363))[name = string("concat_145")]; tensor var_3506_cast_fp16 = transpose(perm = var_3506_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_40")]; tensor var_3509_cast_fp16 = reshape(shape = concat_145, x = var_3506_cast_fp16)[name = string("op_3509_cast_fp16")]; tensor layers_17_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_17_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853886528)))]; tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_o_proj_weight_to_fp16, x = var_3509_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor hidden_states_819_cast_fp16 = add(x = hidden_states_783_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_819_cast_fp16")]; fp16 var_3362_promoted_3_to_fp16 = const()[name = string("op_3362_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3516_cast_fp16 = pow(x = hidden_states_819_cast_fp16, y = var_3362_promoted_3_to_fp16)[name = string("op_3516_cast_fp16")]; tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([-1])]; bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; tensor variance_143_cast_fp16 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = var_3516_cast_fp16)[name = string("variance_143_cast_fp16")]; fp16 var_3519_to_fp16 = const()[name = string("op_3519_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3520_cast_fp16 = add(x = variance_143_cast_fp16, y = var_3519_to_fp16)[name = string("op_3520_cast_fp16")]; fp32 var_3521_epsilon_0 = const()[name = string("op_3521_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3521_cast_fp16 = rsqrt(epsilon = var_3521_epsilon_0, x = var_3520_cast_fp16)[name = string("op_3521_cast_fp16")]; tensor hidden_states_823_cast_fp16 = mul(x = hidden_states_819_cast_fp16, y = var_3521_cast_fp16)[name = string("hidden_states_823_cast_fp16")]; tensor layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858080896)))]; tensor input_213_cast_fp16 = mul(x = layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_823_cast_fp16)[name = string("input_213_cast_fp16")]; tensor layers_17_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_17_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858083008)))]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_gate_proj_weight_to_fp16, x = input_213_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_3533_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3533_cast_fp16")]; tensor layers_17_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_17_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(864374528)))]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_up_proj_weight_to_fp16, x = input_213_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_217_cast_fp16 = mul(x = var_3533_cast_fp16, y = linear_124_cast_fp16)[name = string("input_217_cast_fp16")]; tensor layers_17_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_17_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(870666048)))]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_mlp_down_proj_weight_to_fp16, x = input_217_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor hidden_states_829_cast_fp16 = add(x = hidden_states_819_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_829_cast_fp16")]; int32 var_3550 = const()[name = string("op_3550"), val = int32(2)]; int32 var_3551 = const()[name = string("op_3551"), val = int32(-1)]; fp16 var_3550_promoted_to_fp16 = const()[name = string("op_3550_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3560_cast_fp16 = pow(x = hidden_states_829_cast_fp16, y = var_3550_promoted_to_fp16)[name = string("op_3560_cast_fp16")]; tensor variance_145_axes_0 = const()[name = string("variance_145_axes_0"), val = tensor([-1])]; bool variance_145_keep_dims_0 = const()[name = string("variance_145_keep_dims_0"), val = bool(true)]; tensor variance_145_cast_fp16 = reduce_mean(axes = variance_145_axes_0, keep_dims = variance_145_keep_dims_0, x = var_3560_cast_fp16)[name = string("variance_145_cast_fp16")]; fp16 var_3563_to_fp16 = const()[name = string("op_3563_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3564_cast_fp16 = add(x = variance_145_cast_fp16, y = var_3563_to_fp16)[name = string("op_3564_cast_fp16")]; fp32 var_3565_epsilon_0 = const()[name = string("op_3565_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3565_cast_fp16 = rsqrt(epsilon = var_3565_epsilon_0, x = var_3564_cast_fp16)[name = string("op_3565_cast_fp16")]; tensor hidden_states_833_cast_fp16 = mul(x = hidden_states_829_cast_fp16, y = var_3565_cast_fp16)[name = string("hidden_states_833_cast_fp16")]; tensor layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876957568)))]; tensor hidden_states_837_cast_fp16 = mul(x = layers_18_input_layernorm_weight_to_fp16, y = hidden_states_833_cast_fp16)[name = string("hidden_states_837_cast_fp16")]; tensor var_3578_shape_cast_fp16 = shape(x = hidden_states_837_cast_fp16)[name = string("op_3578_shape_cast_fp16")]; int32 gather_254 = const()[name = string("gather_254"), val = int32(1)]; int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; string var_3578_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3578_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_255_indices_0_to_uint16 = const()[name = string("gather_255_indices_0_to_uint16"), val = uint16(1)]; tensor var_3578_shape_cast_fp16_to_uint16 = cast(dtype = var_3578_shape_cast_fp16_to_uint16_dtype_0, x = var_3578_shape_cast_fp16)[name = string("cast_285")]; uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = gather_255_indices_0_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_3578_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_18_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_18_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876959680)))]; tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_18_self_attn_q_proj_weight_to_fp16, x = hidden_states_837_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_839_cast_fp16 = reshape(shape = concat_146x, x = linear_126_cast_fp16)[name = string("hidden_states_839_cast_fp16")]; fp16 var_3550_promoted_1_to_fp16 = const()[name = string("op_3550_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3586_cast_fp16 = pow(x = hidden_states_839_cast_fp16, y = var_3550_promoted_1_to_fp16)[name = string("op_3586_cast_fp16")]; tensor variance_147_axes_0 = const()[name = string("variance_147_axes_0"), val = tensor([-1])]; bool variance_147_keep_dims_0 = const()[name = string("variance_147_keep_dims_0"), val = bool(true)]; tensor variance_147_cast_fp16 = reduce_mean(axes = variance_147_axes_0, keep_dims = variance_147_keep_dims_0, x = var_3586_cast_fp16)[name = string("variance_147_cast_fp16")]; fp16 var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3590_cast_fp16 = add(x = variance_147_cast_fp16, y = var_3589_to_fp16)[name = string("op_3590_cast_fp16")]; fp32 var_3591_epsilon_0 = const()[name = string("op_3591_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3591_cast_fp16 = rsqrt(epsilon = var_3591_epsilon_0, x = var_3590_cast_fp16)[name = string("op_3591_cast_fp16")]; tensor hidden_states_843_cast_fp16 = mul(x = hidden_states_839_cast_fp16, y = var_3591_cast_fp16)[name = string("hidden_states_843_cast_fp16")]; tensor layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881154048)))]; tensor var_3594_cast_fp16 = mul(x = layers_18_self_attn_q_norm_weight_to_fp16, y = hidden_states_843_cast_fp16)[name = string("op_3594_cast_fp16")]; tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_18_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_18_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881154368)))]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_k_proj_weight_to_fp16, x = hidden_states_837_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_847_cast_fp16 = reshape(shape = concat_147x, x = linear_127_cast_fp16)[name = string("hidden_states_847_cast_fp16")]; fp16 var_3550_promoted_2_to_fp16 = const()[name = string("op_3550_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3602_cast_fp16 = pow(x = hidden_states_847_cast_fp16, y = var_3550_promoted_2_to_fp16)[name = string("op_3602_cast_fp16")]; tensor variance_149_axes_0 = const()[name = string("variance_149_axes_0"), val = tensor([-1])]; bool variance_149_keep_dims_0 = const()[name = string("variance_149_keep_dims_0"), val = bool(true)]; tensor variance_149_cast_fp16 = reduce_mean(axes = variance_149_axes_0, keep_dims = variance_149_keep_dims_0, x = var_3602_cast_fp16)[name = string("variance_149_cast_fp16")]; fp16 var_3605_to_fp16 = const()[name = string("op_3605_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3606_cast_fp16 = add(x = variance_149_cast_fp16, y = var_3605_to_fp16)[name = string("op_3606_cast_fp16")]; fp32 var_3607_epsilon_0 = const()[name = string("op_3607_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3607_cast_fp16 = rsqrt(epsilon = var_3607_epsilon_0, x = var_3606_cast_fp16)[name = string("op_3607_cast_fp16")]; tensor hidden_states_851_cast_fp16 = mul(x = hidden_states_847_cast_fp16, y = var_3607_cast_fp16)[name = string("hidden_states_851_cast_fp16")]; tensor layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(883251584)))]; tensor var_3610_cast_fp16 = mul(x = layers_18_self_attn_k_norm_weight_to_fp16, y = hidden_states_851_cast_fp16)[name = string("op_3610_cast_fp16")]; tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_18_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_18_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(883251904)))]; tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_v_proj_weight_to_fp16, x = hidden_states_837_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor concat_148x = const()[name = string("concat_148x"), val = tensor([1, -1, 8, 128])]; tensor var_3615_cast_fp16 = reshape(shape = concat_148x, x = linear_128_cast_fp16)[name = string("op_3615_cast_fp16")]; tensor hidden_states_859_perm_0 = const()[name = string("hidden_states_859_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_3594_cast_fp16)[name = string("transpose_39")]; tensor var_3619_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3619_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3630_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_3630_cast_fp16")]; bool var_3632_interleave_0 = const()[name = string("op_3632_interleave_0"), val = bool(false)]; tensor var_3632_cast_fp16 = concat(axis = var_3551, interleave = var_3632_interleave_0, values = (var_3630_cast_fp16, x1_73_cast_fp16))[name = string("op_3632_cast_fp16")]; tensor var_3633_cast_fp16 = mul(x = var_3632_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3633_cast_fp16")]; tensor query_37_cast_fp16 = add(x = var_3619_cast_fp16, y = var_3633_cast_fp16)[name = string("query_37_cast_fp16")]; tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_3610_cast_fp16)[name = string("transpose_38")]; tensor var_3635_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3635_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3646_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_3646_cast_fp16")]; bool var_3648_interleave_0 = const()[name = string("op_3648_interleave_0"), val = bool(false)]; tensor var_3648_cast_fp16 = concat(axis = var_3551, interleave = var_3648_interleave_0, values = (var_3646_cast_fp16, x1_75_cast_fp16))[name = string("op_3648_cast_fp16")]; tensor var_3649_cast_fp16 = mul(x = var_3648_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3649_cast_fp16")]; tensor hidden_states_855_cast_fp16 = add(x = var_3635_cast_fp16, y = var_3649_cast_fp16)[name = string("hidden_states_855_cast_fp16")]; tensor var_3651_shape_cast_fp16 = shape(x = hidden_states_855_cast_fp16)[name = string("op_3651_shape_cast_fp16")]; int32 gather_260 = const()[name = string("gather_260"), val = int32(1)]; int32 gather_261 = const()[name = string("gather_261"), val = int32(8)]; int32 gather_262_axis_0 = const()[name = string("gather_262_axis_0"), val = int32(0)]; int32 gather_262_batch_dims_0 = const()[name = string("gather_262_batch_dims_0"), val = int32(0)]; bool gather_262_validate_indices_0 = const()[name = string("gather_262_validate_indices_0"), val = bool(false)]; string var_3651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_262_indices_0_to_uint16 = const()[name = string("gather_262_indices_0_to_uint16"), val = uint16(2)]; tensor var_3651_shape_cast_fp16_to_uint16 = cast(dtype = var_3651_shape_cast_fp16_to_uint16_dtype_0, x = var_3651_shape_cast_fp16)[name = string("cast_283")]; uint16 gather_262_cast_uint16 = gather(axis = gather_262_axis_0, batch_dims = gather_262_batch_dims_0, indices = gather_262_indices_0_to_uint16, validate_indices = gather_262_validate_indices_0, x = var_3651_shape_cast_fp16_to_uint16)[name = string("gather_262_cast_uint16")]; string gather_262_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_262_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_263 = const()[name = string("gather_263"), val = int32(128)]; tensor var_3658_axes_0 = const()[name = string("op_3658_axes_0"), val = tensor([2])]; tensor var_3658_cast_fp16 = expand_dims(axes = var_3658_axes_0, x = hidden_states_855_cast_fp16)[name = string("op_3658_cast_fp16")]; int32 concat_149_axis_0 = const()[name = string("concat_149_axis_0"), val = int32(0)]; bool concat_149_interleave_0 = const()[name = string("concat_149_interleave_0"), val = bool(false)]; int32 gather_262_cast_uint16_to_int32 = cast(dtype = gather_262_cast_uint16_to_int32_dtype_0, x = gather_262_cast_uint16)[name = string("cast_282")]; tensor concat_149 = concat(axis = concat_149_axis_0, interleave = concat_149_interleave_0, values = (gather_260, gather_261, var_3550, gather_262_cast_uint16_to_int32, gather_263))[name = string("concat_149")]; tensor shape_36_cast_fp16 = shape(x = var_3658_cast_fp16)[name = string("shape_36_cast_fp16")]; int32 equal_36_y_0 = const()[name = string("equal_36_y_0"), val = int32(-1)]; tensor equal_36 = equal(x = concat_149, y = equal_36_y_0)[name = string("equal_36")]; tensor select_36 = select(a = shape_36_cast_fp16, b = concat_149, cond = equal_36)[name = string("select_36")]; tensor real_div_36 = real_div(x = select_36, y = shape_36_cast_fp16)[name = string("real_div_36")]; tensor hidden_states_857_cast_fp16 = tile(reps = real_div_36, x = var_3658_cast_fp16)[name = string("hidden_states_857_cast_fp16")]; tensor concat_150x = const()[name = string("concat_150x"), val = tensor([1, 16, -1, 128])]; tensor key_states_37_cast_fp16 = reshape(shape = concat_150x, x = hidden_states_857_cast_fp16)[name = string("key_states_37_cast_fp16")]; tensor hidden_states_859_cast_fp16 = transpose(perm = hidden_states_859_perm_0, x = var_3615_cast_fp16)[name = string("transpose_37")]; tensor var_3668_shape_cast_fp16 = shape(x = hidden_states_859_cast_fp16)[name = string("op_3668_shape_cast_fp16")]; int32 gather_264 = const()[name = string("gather_264"), val = int32(1)]; int32 gather_265 = const()[name = string("gather_265"), val = int32(8)]; int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; string var_3668_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3668_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_266_indices_0_to_uint16 = const()[name = string("gather_266_indices_0_to_uint16"), val = uint16(2)]; tensor var_3668_shape_cast_fp16_to_uint16 = cast(dtype = var_3668_shape_cast_fp16_to_uint16_dtype_0, x = var_3668_shape_cast_fp16)[name = string("cast_281")]; uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = gather_266_indices_0_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_3668_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_267 = const()[name = string("gather_267"), val = int32(128)]; tensor var_3675_axes_0 = const()[name = string("op_3675_axes_0"), val = tensor([2])]; tensor var_3675_cast_fp16 = expand_dims(axes = var_3675_axes_0, x = hidden_states_859_cast_fp16)[name = string("op_3675_cast_fp16")]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_280")]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (gather_264, gather_265, var_3550, gather_266_cast_uint16_to_int32, gather_267))[name = string("concat_151")]; tensor shape_37_cast_fp16 = shape(x = var_3675_cast_fp16)[name = string("shape_37_cast_fp16")]; int32 equal_37_y_0 = const()[name = string("equal_37_y_0"), val = int32(-1)]; tensor equal_37 = equal(x = concat_151, y = equal_37_y_0)[name = string("equal_37")]; tensor select_37 = select(a = shape_37_cast_fp16, b = concat_151, cond = equal_37)[name = string("select_37")]; tensor real_div_37 = real_div(x = select_37, y = shape_37_cast_fp16)[name = string("real_div_37")]; tensor hidden_states_861_cast_fp16 = tile(reps = real_div_37, x = var_3675_cast_fp16)[name = string("hidden_states_861_cast_fp16")]; tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, 16, -1, 128])]; tensor value_states_37_cast_fp16 = reshape(shape = concat_152x, x = hidden_states_861_cast_fp16)[name = string("value_states_37_cast_fp16")]; bool var_3686_transpose_x_1 = const()[name = string("op_3686_transpose_x_1"), val = bool(false)]; bool var_3686_transpose_y_1 = const()[name = string("op_3686_transpose_y_1"), val = bool(true)]; tensor var_3686_cast_fp16 = matmul(transpose_x = var_3686_transpose_x_1, transpose_y = var_3686_transpose_y_1, x = query_37_cast_fp16, y = key_states_37_cast_fp16)[name = string("op_3686_cast_fp16")]; fp16 var_3687_to_fp16 = const()[name = string("op_3687_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_3686_cast_fp16, y = var_3687_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor input_219_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_219_cast_fp16")]; tensor var_3690_cast_fp16 = softmax(axis = var_3551, x = input_219_cast_fp16)[name = string("op_3690_cast_fp16")]; bool attn_output_73_transpose_x_0 = const()[name = string("attn_output_73_transpose_x_0"), val = bool(false)]; bool attn_output_73_transpose_y_0 = const()[name = string("attn_output_73_transpose_y_0"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = var_3690_cast_fp16, y = value_states_37_cast_fp16)[name = string("attn_output_73_cast_fp16")]; tensor var_3694_perm_0 = const()[name = string("op_3694_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_153_axis_0 = const()[name = string("concat_153_axis_0"), val = int32(0)]; bool concat_153_interleave_0 = const()[name = string("concat_153_interleave_0"), val = bool(false)]; int32 gather_255_cast_uint16_to_int32 = cast(dtype = gather_255_cast_uint16_to_int32_dtype_0, x = gather_255_cast_uint16)[name = string("cast_284")]; tensor concat_153 = concat(axis = concat_153_axis_0, interleave = concat_153_interleave_0, values = (gather_254, gather_255_cast_uint16_to_int32, var_3551))[name = string("concat_153")]; tensor var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_36")]; tensor var_3697_cast_fp16 = reshape(shape = concat_153, x = var_3694_cast_fp16)[name = string("op_3697_cast_fp16")]; tensor layers_18_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_18_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885349120)))]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_o_proj_weight_to_fp16, x = var_3697_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor hidden_states_865_cast_fp16 = add(x = hidden_states_829_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_865_cast_fp16")]; fp16 var_3550_promoted_3_to_fp16 = const()[name = string("op_3550_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3704_cast_fp16 = pow(x = hidden_states_865_cast_fp16, y = var_3550_promoted_3_to_fp16)[name = string("op_3704_cast_fp16")]; tensor variance_151_axes_0 = const()[name = string("variance_151_axes_0"), val = tensor([-1])]; bool variance_151_keep_dims_0 = const()[name = string("variance_151_keep_dims_0"), val = bool(true)]; tensor variance_151_cast_fp16 = reduce_mean(axes = variance_151_axes_0, keep_dims = variance_151_keep_dims_0, x = var_3704_cast_fp16)[name = string("variance_151_cast_fp16")]; fp16 var_3707_to_fp16 = const()[name = string("op_3707_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3708_cast_fp16 = add(x = variance_151_cast_fp16, y = var_3707_to_fp16)[name = string("op_3708_cast_fp16")]; fp32 var_3709_epsilon_0 = const()[name = string("op_3709_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3709_cast_fp16 = rsqrt(epsilon = var_3709_epsilon_0, x = var_3708_cast_fp16)[name = string("op_3709_cast_fp16")]; tensor hidden_states_869_cast_fp16 = mul(x = hidden_states_865_cast_fp16, y = var_3709_cast_fp16)[name = string("hidden_states_869_cast_fp16")]; tensor layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889543488)))]; tensor input_225_cast_fp16 = mul(x = layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_869_cast_fp16)[name = string("input_225_cast_fp16")]; tensor layers_18_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_18_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889545600)))]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_gate_proj_weight_to_fp16, x = input_225_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_3721_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_3721_cast_fp16")]; tensor layers_18_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_18_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(895837120)))]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_up_proj_weight_to_fp16, x = input_225_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_229_cast_fp16 = mul(x = var_3721_cast_fp16, y = linear_131_cast_fp16)[name = string("input_229_cast_fp16")]; tensor layers_18_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_18_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902128640)))]; tensor linear_132_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_mlp_down_proj_weight_to_fp16, x = input_229_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor hidden_states_875_cast_fp16 = add(x = hidden_states_865_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_875_cast_fp16")]; int32 var_3738 = const()[name = string("op_3738"), val = int32(2)]; int32 var_3739 = const()[name = string("op_3739"), val = int32(-1)]; fp16 var_3738_promoted_to_fp16 = const()[name = string("op_3738_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3748_cast_fp16 = pow(x = hidden_states_875_cast_fp16, y = var_3738_promoted_to_fp16)[name = string("op_3748_cast_fp16")]; tensor variance_153_axes_0 = const()[name = string("variance_153_axes_0"), val = tensor([-1])]; bool variance_153_keep_dims_0 = const()[name = string("variance_153_keep_dims_0"), val = bool(true)]; tensor variance_153_cast_fp16 = reduce_mean(axes = variance_153_axes_0, keep_dims = variance_153_keep_dims_0, x = var_3748_cast_fp16)[name = string("variance_153_cast_fp16")]; fp16 var_3751_to_fp16 = const()[name = string("op_3751_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3752_cast_fp16 = add(x = variance_153_cast_fp16, y = var_3751_to_fp16)[name = string("op_3752_cast_fp16")]; fp32 var_3753_epsilon_0 = const()[name = string("op_3753_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3753_cast_fp16 = rsqrt(epsilon = var_3753_epsilon_0, x = var_3752_cast_fp16)[name = string("op_3753_cast_fp16")]; tensor hidden_states_879_cast_fp16 = mul(x = hidden_states_875_cast_fp16, y = var_3753_cast_fp16)[name = string("hidden_states_879_cast_fp16")]; tensor layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(908420160)))]; tensor hidden_states_883_cast_fp16 = mul(x = layers_19_input_layernorm_weight_to_fp16, y = hidden_states_879_cast_fp16)[name = string("hidden_states_883_cast_fp16")]; tensor var_3766_shape_cast_fp16 = shape(x = hidden_states_883_cast_fp16)[name = string("op_3766_shape_cast_fp16")]; int32 gather_268 = const()[name = string("gather_268"), val = int32(1)]; int32 gather_269_axis_0 = const()[name = string("gather_269_axis_0"), val = int32(0)]; int32 gather_269_batch_dims_0 = const()[name = string("gather_269_batch_dims_0"), val = int32(0)]; bool gather_269_validate_indices_0 = const()[name = string("gather_269_validate_indices_0"), val = bool(false)]; string var_3766_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3766_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_269_indices_0_to_uint16 = const()[name = string("gather_269_indices_0_to_uint16"), val = uint16(1)]; tensor var_3766_shape_cast_fp16_to_uint16 = cast(dtype = var_3766_shape_cast_fp16_to_uint16_dtype_0, x = var_3766_shape_cast_fp16)[name = string("cast_279")]; uint16 gather_269_cast_uint16 = gather(axis = gather_269_axis_0, batch_dims = gather_269_batch_dims_0, indices = gather_269_indices_0_to_uint16, validate_indices = gather_269_validate_indices_0, x = var_3766_shape_cast_fp16_to_uint16)[name = string("gather_269_cast_uint16")]; string gather_269_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_269_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_19_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_19_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(908422272)))]; tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_19_self_attn_q_proj_weight_to_fp16, x = hidden_states_883_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_885_cast_fp16 = reshape(shape = concat_154x, x = linear_133_cast_fp16)[name = string("hidden_states_885_cast_fp16")]; fp16 var_3738_promoted_1_to_fp16 = const()[name = string("op_3738_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3774_cast_fp16 = pow(x = hidden_states_885_cast_fp16, y = var_3738_promoted_1_to_fp16)[name = string("op_3774_cast_fp16")]; tensor variance_155_axes_0 = const()[name = string("variance_155_axes_0"), val = tensor([-1])]; bool variance_155_keep_dims_0 = const()[name = string("variance_155_keep_dims_0"), val = bool(true)]; tensor variance_155_cast_fp16 = reduce_mean(axes = variance_155_axes_0, keep_dims = variance_155_keep_dims_0, x = var_3774_cast_fp16)[name = string("variance_155_cast_fp16")]; fp16 var_3777_to_fp16 = const()[name = string("op_3777_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3778_cast_fp16 = add(x = variance_155_cast_fp16, y = var_3777_to_fp16)[name = string("op_3778_cast_fp16")]; fp32 var_3779_epsilon_0 = const()[name = string("op_3779_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3779_cast_fp16 = rsqrt(epsilon = var_3779_epsilon_0, x = var_3778_cast_fp16)[name = string("op_3779_cast_fp16")]; tensor hidden_states_889_cast_fp16 = mul(x = hidden_states_885_cast_fp16, y = var_3779_cast_fp16)[name = string("hidden_states_889_cast_fp16")]; tensor layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912616640)))]; tensor var_3782_cast_fp16 = mul(x = layers_19_self_attn_q_norm_weight_to_fp16, y = hidden_states_889_cast_fp16)[name = string("op_3782_cast_fp16")]; tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_19_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_19_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(912616960)))]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_k_proj_weight_to_fp16, x = hidden_states_883_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_893_cast_fp16 = reshape(shape = concat_155x, x = linear_134_cast_fp16)[name = string("hidden_states_893_cast_fp16")]; fp16 var_3738_promoted_2_to_fp16 = const()[name = string("op_3738_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3790_cast_fp16 = pow(x = hidden_states_893_cast_fp16, y = var_3738_promoted_2_to_fp16)[name = string("op_3790_cast_fp16")]; tensor variance_157_axes_0 = const()[name = string("variance_157_axes_0"), val = tensor([-1])]; bool variance_157_keep_dims_0 = const()[name = string("variance_157_keep_dims_0"), val = bool(true)]; tensor variance_157_cast_fp16 = reduce_mean(axes = variance_157_axes_0, keep_dims = variance_157_keep_dims_0, x = var_3790_cast_fp16)[name = string("variance_157_cast_fp16")]; fp16 var_3793_to_fp16 = const()[name = string("op_3793_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3794_cast_fp16 = add(x = variance_157_cast_fp16, y = var_3793_to_fp16)[name = string("op_3794_cast_fp16")]; fp32 var_3795_epsilon_0 = const()[name = string("op_3795_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3795_cast_fp16 = rsqrt(epsilon = var_3795_epsilon_0, x = var_3794_cast_fp16)[name = string("op_3795_cast_fp16")]; tensor hidden_states_897_cast_fp16 = mul(x = hidden_states_893_cast_fp16, y = var_3795_cast_fp16)[name = string("hidden_states_897_cast_fp16")]; tensor layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914714176)))]; tensor var_3798_cast_fp16 = mul(x = layers_19_self_attn_k_norm_weight_to_fp16, y = hidden_states_897_cast_fp16)[name = string("op_3798_cast_fp16")]; tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_19_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_19_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914714496)))]; tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_v_proj_weight_to_fp16, x = hidden_states_883_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor concat_156x = const()[name = string("concat_156x"), val = tensor([1, -1, 8, 128])]; tensor var_3803_cast_fp16 = reshape(shape = concat_156x, x = linear_135_cast_fp16)[name = string("op_3803_cast_fp16")]; tensor hidden_states_905_perm_0 = const()[name = string("hidden_states_905_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_3782_cast_fp16)[name = string("transpose_35")]; tensor var_3807_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3807_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3818_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3818_cast_fp16")]; bool var_3820_interleave_0 = const()[name = string("op_3820_interleave_0"), val = bool(false)]; tensor var_3820_cast_fp16 = concat(axis = var_3739, interleave = var_3820_interleave_0, values = (var_3818_cast_fp16, x1_77_cast_fp16))[name = string("op_3820_cast_fp16")]; tensor var_3821_cast_fp16 = mul(x = var_3820_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3821_cast_fp16")]; tensor query_39_cast_fp16 = add(x = var_3807_cast_fp16, y = var_3821_cast_fp16)[name = string("query_39_cast_fp16")]; tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_3798_cast_fp16)[name = string("transpose_34")]; tensor var_3823_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3823_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3834_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3834_cast_fp16")]; bool var_3836_interleave_0 = const()[name = string("op_3836_interleave_0"), val = bool(false)]; tensor var_3836_cast_fp16 = concat(axis = var_3739, interleave = var_3836_interleave_0, values = (var_3834_cast_fp16, x1_79_cast_fp16))[name = string("op_3836_cast_fp16")]; tensor var_3837_cast_fp16 = mul(x = var_3836_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3837_cast_fp16")]; tensor hidden_states_901_cast_fp16 = add(x = var_3823_cast_fp16, y = var_3837_cast_fp16)[name = string("hidden_states_901_cast_fp16")]; tensor var_3839_shape_cast_fp16 = shape(x = hidden_states_901_cast_fp16)[name = string("op_3839_shape_cast_fp16")]; int32 gather_274 = const()[name = string("gather_274"), val = int32(1)]; int32 gather_275 = const()[name = string("gather_275"), val = int32(8)]; int32 gather_276_axis_0 = const()[name = string("gather_276_axis_0"), val = int32(0)]; int32 gather_276_batch_dims_0 = const()[name = string("gather_276_batch_dims_0"), val = int32(0)]; bool gather_276_validate_indices_0 = const()[name = string("gather_276_validate_indices_0"), val = bool(false)]; string var_3839_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3839_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_276_indices_0_to_uint16 = const()[name = string("gather_276_indices_0_to_uint16"), val = uint16(2)]; tensor var_3839_shape_cast_fp16_to_uint16 = cast(dtype = var_3839_shape_cast_fp16_to_uint16_dtype_0, x = var_3839_shape_cast_fp16)[name = string("cast_277")]; uint16 gather_276_cast_uint16 = gather(axis = gather_276_axis_0, batch_dims = gather_276_batch_dims_0, indices = gather_276_indices_0_to_uint16, validate_indices = gather_276_validate_indices_0, x = var_3839_shape_cast_fp16_to_uint16)[name = string("gather_276_cast_uint16")]; string gather_276_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_276_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_277 = const()[name = string("gather_277"), val = int32(128)]; tensor var_3846_axes_0 = const()[name = string("op_3846_axes_0"), val = tensor([2])]; tensor var_3846_cast_fp16 = expand_dims(axes = var_3846_axes_0, x = hidden_states_901_cast_fp16)[name = string("op_3846_cast_fp16")]; int32 concat_157_axis_0 = const()[name = string("concat_157_axis_0"), val = int32(0)]; bool concat_157_interleave_0 = const()[name = string("concat_157_interleave_0"), val = bool(false)]; int32 gather_276_cast_uint16_to_int32 = cast(dtype = gather_276_cast_uint16_to_int32_dtype_0, x = gather_276_cast_uint16)[name = string("cast_276")]; tensor concat_157 = concat(axis = concat_157_axis_0, interleave = concat_157_interleave_0, values = (gather_274, gather_275, var_3738, gather_276_cast_uint16_to_int32, gather_277))[name = string("concat_157")]; tensor shape_38_cast_fp16 = shape(x = var_3846_cast_fp16)[name = string("shape_38_cast_fp16")]; int32 equal_38_y_0 = const()[name = string("equal_38_y_0"), val = int32(-1)]; tensor equal_38 = equal(x = concat_157, y = equal_38_y_0)[name = string("equal_38")]; tensor select_38 = select(a = shape_38_cast_fp16, b = concat_157, cond = equal_38)[name = string("select_38")]; tensor real_div_38 = real_div(x = select_38, y = shape_38_cast_fp16)[name = string("real_div_38")]; tensor hidden_states_903_cast_fp16 = tile(reps = real_div_38, x = var_3846_cast_fp16)[name = string("hidden_states_903_cast_fp16")]; tensor concat_158x = const()[name = string("concat_158x"), val = tensor([1, 16, -1, 128])]; tensor key_states_39_cast_fp16 = reshape(shape = concat_158x, x = hidden_states_903_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor hidden_states_905_cast_fp16 = transpose(perm = hidden_states_905_perm_0, x = var_3803_cast_fp16)[name = string("transpose_33")]; tensor var_3856_shape_cast_fp16 = shape(x = hidden_states_905_cast_fp16)[name = string("op_3856_shape_cast_fp16")]; int32 gather_278 = const()[name = string("gather_278"), val = int32(1)]; int32 gather_279 = const()[name = string("gather_279"), val = int32(8)]; int32 gather_280_axis_0 = const()[name = string("gather_280_axis_0"), val = int32(0)]; int32 gather_280_batch_dims_0 = const()[name = string("gather_280_batch_dims_0"), val = int32(0)]; bool gather_280_validate_indices_0 = const()[name = string("gather_280_validate_indices_0"), val = bool(false)]; string var_3856_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3856_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_280_indices_0_to_uint16 = const()[name = string("gather_280_indices_0_to_uint16"), val = uint16(2)]; tensor var_3856_shape_cast_fp16_to_uint16 = cast(dtype = var_3856_shape_cast_fp16_to_uint16_dtype_0, x = var_3856_shape_cast_fp16)[name = string("cast_275")]; uint16 gather_280_cast_uint16 = gather(axis = gather_280_axis_0, batch_dims = gather_280_batch_dims_0, indices = gather_280_indices_0_to_uint16, validate_indices = gather_280_validate_indices_0, x = var_3856_shape_cast_fp16_to_uint16)[name = string("gather_280_cast_uint16")]; string gather_280_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_280_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_281 = const()[name = string("gather_281"), val = int32(128)]; tensor var_3863_axes_0 = const()[name = string("op_3863_axes_0"), val = tensor([2])]; tensor var_3863_cast_fp16 = expand_dims(axes = var_3863_axes_0, x = hidden_states_905_cast_fp16)[name = string("op_3863_cast_fp16")]; int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; int32 gather_280_cast_uint16_to_int32 = cast(dtype = gather_280_cast_uint16_to_int32_dtype_0, x = gather_280_cast_uint16)[name = string("cast_274")]; tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (gather_278, gather_279, var_3738, gather_280_cast_uint16_to_int32, gather_281))[name = string("concat_159")]; tensor shape_39_cast_fp16 = shape(x = var_3863_cast_fp16)[name = string("shape_39_cast_fp16")]; int32 equal_39_y_0 = const()[name = string("equal_39_y_0"), val = int32(-1)]; tensor equal_39 = equal(x = concat_159, y = equal_39_y_0)[name = string("equal_39")]; tensor select_39 = select(a = shape_39_cast_fp16, b = concat_159, cond = equal_39)[name = string("select_39")]; tensor real_div_39 = real_div(x = select_39, y = shape_39_cast_fp16)[name = string("real_div_39")]; tensor hidden_states_907_cast_fp16 = tile(reps = real_div_39, x = var_3863_cast_fp16)[name = string("hidden_states_907_cast_fp16")]; tensor concat_160x = const()[name = string("concat_160x"), val = tensor([1, 16, -1, 128])]; tensor value_states_39_cast_fp16 = reshape(shape = concat_160x, x = hidden_states_907_cast_fp16)[name = string("value_states_39_cast_fp16")]; bool var_3874_transpose_x_1 = const()[name = string("op_3874_transpose_x_1"), val = bool(false)]; bool var_3874_transpose_y_1 = const()[name = string("op_3874_transpose_y_1"), val = bool(true)]; tensor var_3874_cast_fp16 = matmul(transpose_x = var_3874_transpose_x_1, transpose_y = var_3874_transpose_y_1, x = query_39_cast_fp16, y = key_states_39_cast_fp16)[name = string("op_3874_cast_fp16")]; fp16 var_3875_to_fp16 = const()[name = string("op_3875_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_77_cast_fp16 = mul(x = var_3874_cast_fp16, y = var_3875_to_fp16)[name = string("attn_weights_77_cast_fp16")]; tensor input_231_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_231_cast_fp16")]; tensor var_3878_cast_fp16 = softmax(axis = var_3739, x = input_231_cast_fp16)[name = string("op_3878_cast_fp16")]; bool attn_output_77_transpose_x_0 = const()[name = string("attn_output_77_transpose_x_0"), val = bool(false)]; bool attn_output_77_transpose_y_0 = const()[name = string("attn_output_77_transpose_y_0"), val = bool(false)]; tensor attn_output_77_cast_fp16 = matmul(transpose_x = attn_output_77_transpose_x_0, transpose_y = attn_output_77_transpose_y_0, x = var_3878_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_77_cast_fp16")]; tensor var_3882_perm_0 = const()[name = string("op_3882_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_161_axis_0 = const()[name = string("concat_161_axis_0"), val = int32(0)]; bool concat_161_interleave_0 = const()[name = string("concat_161_interleave_0"), val = bool(false)]; int32 gather_269_cast_uint16_to_int32 = cast(dtype = gather_269_cast_uint16_to_int32_dtype_0, x = gather_269_cast_uint16)[name = string("cast_278")]; tensor concat_161 = concat(axis = concat_161_axis_0, interleave = concat_161_interleave_0, values = (gather_268, gather_269_cast_uint16_to_int32, var_3739))[name = string("concat_161")]; tensor var_3882_cast_fp16 = transpose(perm = var_3882_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_32")]; tensor var_3885_cast_fp16 = reshape(shape = concat_161, x = var_3882_cast_fp16)[name = string("op_3885_cast_fp16")]; tensor layers_19_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_19_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(916811712)))]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_o_proj_weight_to_fp16, x = var_3885_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor hidden_states_911_cast_fp16 = add(x = hidden_states_875_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_911_cast_fp16")]; fp16 var_3738_promoted_3_to_fp16 = const()[name = string("op_3738_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3892_cast_fp16 = pow(x = hidden_states_911_cast_fp16, y = var_3738_promoted_3_to_fp16)[name = string("op_3892_cast_fp16")]; tensor variance_159_axes_0 = const()[name = string("variance_159_axes_0"), val = tensor([-1])]; bool variance_159_keep_dims_0 = const()[name = string("variance_159_keep_dims_0"), val = bool(true)]; tensor variance_159_cast_fp16 = reduce_mean(axes = variance_159_axes_0, keep_dims = variance_159_keep_dims_0, x = var_3892_cast_fp16)[name = string("variance_159_cast_fp16")]; fp16 var_3895_to_fp16 = const()[name = string("op_3895_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3896_cast_fp16 = add(x = variance_159_cast_fp16, y = var_3895_to_fp16)[name = string("op_3896_cast_fp16")]; fp32 var_3897_epsilon_0 = const()[name = string("op_3897_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3897_cast_fp16 = rsqrt(epsilon = var_3897_epsilon_0, x = var_3896_cast_fp16)[name = string("op_3897_cast_fp16")]; tensor hidden_states_915_cast_fp16 = mul(x = hidden_states_911_cast_fp16, y = var_3897_cast_fp16)[name = string("hidden_states_915_cast_fp16")]; tensor layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921006080)))]; tensor input_237_cast_fp16 = mul(x = layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_915_cast_fp16)[name = string("input_237_cast_fp16")]; tensor layers_19_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_19_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921008192)))]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_gate_proj_weight_to_fp16, x = input_237_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_3909_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3909_cast_fp16")]; tensor layers_19_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_19_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927299712)))]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_up_proj_weight_to_fp16, x = input_237_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_241_cast_fp16 = mul(x = var_3909_cast_fp16, y = linear_138_cast_fp16)[name = string("input_241_cast_fp16")]; tensor layers_19_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_19_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933591232)))]; tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_mlp_down_proj_weight_to_fp16, x = input_241_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor hidden_states_921_cast_fp16 = add(x = hidden_states_911_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_921_cast_fp16")]; int32 var_3926 = const()[name = string("op_3926"), val = int32(2)]; int32 var_3927 = const()[name = string("op_3927"), val = int32(-1)]; fp16 var_3926_promoted_to_fp16 = const()[name = string("op_3926_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3936_cast_fp16 = pow(x = hidden_states_921_cast_fp16, y = var_3926_promoted_to_fp16)[name = string("op_3936_cast_fp16")]; tensor variance_161_axes_0 = const()[name = string("variance_161_axes_0"), val = tensor([-1])]; bool variance_161_keep_dims_0 = const()[name = string("variance_161_keep_dims_0"), val = bool(true)]; tensor variance_161_cast_fp16 = reduce_mean(axes = variance_161_axes_0, keep_dims = variance_161_keep_dims_0, x = var_3936_cast_fp16)[name = string("variance_161_cast_fp16")]; fp16 var_3939_to_fp16 = const()[name = string("op_3939_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3940_cast_fp16 = add(x = variance_161_cast_fp16, y = var_3939_to_fp16)[name = string("op_3940_cast_fp16")]; fp32 var_3941_epsilon_0 = const()[name = string("op_3941_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3941_cast_fp16 = rsqrt(epsilon = var_3941_epsilon_0, x = var_3940_cast_fp16)[name = string("op_3941_cast_fp16")]; tensor hidden_states_925_cast_fp16 = mul(x = hidden_states_921_cast_fp16, y = var_3941_cast_fp16)[name = string("hidden_states_925_cast_fp16")]; tensor layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939882752)))]; tensor hidden_states_929_cast_fp16 = mul(x = layers_20_input_layernorm_weight_to_fp16, y = hidden_states_925_cast_fp16)[name = string("hidden_states_929_cast_fp16")]; tensor var_3954_shape_cast_fp16 = shape(x = hidden_states_929_cast_fp16)[name = string("op_3954_shape_cast_fp16")]; int32 gather_282 = const()[name = string("gather_282"), val = int32(1)]; int32 gather_283_axis_0 = const()[name = string("gather_283_axis_0"), val = int32(0)]; int32 gather_283_batch_dims_0 = const()[name = string("gather_283_batch_dims_0"), val = int32(0)]; bool gather_283_validate_indices_0 = const()[name = string("gather_283_validate_indices_0"), val = bool(false)]; string var_3954_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3954_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_283_indices_0_to_uint16 = const()[name = string("gather_283_indices_0_to_uint16"), val = uint16(1)]; tensor var_3954_shape_cast_fp16_to_uint16 = cast(dtype = var_3954_shape_cast_fp16_to_uint16_dtype_0, x = var_3954_shape_cast_fp16)[name = string("cast_273")]; uint16 gather_283_cast_uint16 = gather(axis = gather_283_axis_0, batch_dims = gather_283_batch_dims_0, indices = gather_283_indices_0_to_uint16, validate_indices = gather_283_validate_indices_0, x = var_3954_shape_cast_fp16_to_uint16)[name = string("gather_283_cast_uint16")]; string gather_283_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_283_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_20_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_20_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939884864)))]; tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_20_self_attn_q_proj_weight_to_fp16, x = hidden_states_929_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor concat_162x = const()[name = string("concat_162x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_931_cast_fp16 = reshape(shape = concat_162x, x = linear_140_cast_fp16)[name = string("hidden_states_931_cast_fp16")]; fp16 var_3926_promoted_1_to_fp16 = const()[name = string("op_3926_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_3962_cast_fp16 = pow(x = hidden_states_931_cast_fp16, y = var_3926_promoted_1_to_fp16)[name = string("op_3962_cast_fp16")]; tensor variance_163_axes_0 = const()[name = string("variance_163_axes_0"), val = tensor([-1])]; bool variance_163_keep_dims_0 = const()[name = string("variance_163_keep_dims_0"), val = bool(true)]; tensor variance_163_cast_fp16 = reduce_mean(axes = variance_163_axes_0, keep_dims = variance_163_keep_dims_0, x = var_3962_cast_fp16)[name = string("variance_163_cast_fp16")]; fp16 var_3965_to_fp16 = const()[name = string("op_3965_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3966_cast_fp16 = add(x = variance_163_cast_fp16, y = var_3965_to_fp16)[name = string("op_3966_cast_fp16")]; fp32 var_3967_epsilon_0 = const()[name = string("op_3967_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3967_cast_fp16 = rsqrt(epsilon = var_3967_epsilon_0, x = var_3966_cast_fp16)[name = string("op_3967_cast_fp16")]; tensor hidden_states_935_cast_fp16 = mul(x = hidden_states_931_cast_fp16, y = var_3967_cast_fp16)[name = string("hidden_states_935_cast_fp16")]; tensor layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944079232)))]; tensor var_3970_cast_fp16 = mul(x = layers_20_self_attn_q_norm_weight_to_fp16, y = hidden_states_935_cast_fp16)[name = string("op_3970_cast_fp16")]; tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_20_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_20_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944079552)))]; tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_k_proj_weight_to_fp16, x = hidden_states_929_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor concat_163x = const()[name = string("concat_163x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_939_cast_fp16 = reshape(shape = concat_163x, x = linear_141_cast_fp16)[name = string("hidden_states_939_cast_fp16")]; fp16 var_3926_promoted_2_to_fp16 = const()[name = string("op_3926_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_3978_cast_fp16 = pow(x = hidden_states_939_cast_fp16, y = var_3926_promoted_2_to_fp16)[name = string("op_3978_cast_fp16")]; tensor variance_165_axes_0 = const()[name = string("variance_165_axes_0"), val = tensor([-1])]; bool variance_165_keep_dims_0 = const()[name = string("variance_165_keep_dims_0"), val = bool(true)]; tensor variance_165_cast_fp16 = reduce_mean(axes = variance_165_axes_0, keep_dims = variance_165_keep_dims_0, x = var_3978_cast_fp16)[name = string("variance_165_cast_fp16")]; fp16 var_3981_to_fp16 = const()[name = string("op_3981_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3982_cast_fp16 = add(x = variance_165_cast_fp16, y = var_3981_to_fp16)[name = string("op_3982_cast_fp16")]; fp32 var_3983_epsilon_0 = const()[name = string("op_3983_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3983_cast_fp16 = rsqrt(epsilon = var_3983_epsilon_0, x = var_3982_cast_fp16)[name = string("op_3983_cast_fp16")]; tensor hidden_states_943_cast_fp16 = mul(x = hidden_states_939_cast_fp16, y = var_3983_cast_fp16)[name = string("hidden_states_943_cast_fp16")]; tensor layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946176768)))]; tensor var_3986_cast_fp16 = mul(x = layers_20_self_attn_k_norm_weight_to_fp16, y = hidden_states_943_cast_fp16)[name = string("op_3986_cast_fp16")]; tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_20_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_20_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946177088)))]; tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_v_proj_weight_to_fp16, x = hidden_states_929_cast_fp16)[name = string("linear_142_cast_fp16")]; tensor concat_164x = const()[name = string("concat_164x"), val = tensor([1, -1, 8, 128])]; tensor var_3991_cast_fp16 = reshape(shape = concat_164x, x = linear_142_cast_fp16)[name = string("op_3991_cast_fp16")]; tensor hidden_states_951_perm_0 = const()[name = string("hidden_states_951_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_3970_cast_fp16)[name = string("transpose_31")]; tensor var_3995_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3995_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4006_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_4006_cast_fp16")]; bool var_4008_interleave_0 = const()[name = string("op_4008_interleave_0"), val = bool(false)]; tensor var_4008_cast_fp16 = concat(axis = var_3927, interleave = var_4008_interleave_0, values = (var_4006_cast_fp16, x1_81_cast_fp16))[name = string("op_4008_cast_fp16")]; tensor var_4009_cast_fp16 = mul(x = var_4008_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4009_cast_fp16")]; tensor query_41_cast_fp16 = add(x = var_3995_cast_fp16, y = var_4009_cast_fp16)[name = string("query_41_cast_fp16")]; tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_3986_cast_fp16)[name = string("transpose_30")]; tensor var_4011_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4011_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4022_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_4022_cast_fp16")]; bool var_4024_interleave_0 = const()[name = string("op_4024_interleave_0"), val = bool(false)]; tensor var_4024_cast_fp16 = concat(axis = var_3927, interleave = var_4024_interleave_0, values = (var_4022_cast_fp16, x1_83_cast_fp16))[name = string("op_4024_cast_fp16")]; tensor var_4025_cast_fp16 = mul(x = var_4024_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4025_cast_fp16")]; tensor hidden_states_947_cast_fp16 = add(x = var_4011_cast_fp16, y = var_4025_cast_fp16)[name = string("hidden_states_947_cast_fp16")]; tensor var_4027_shape_cast_fp16 = shape(x = hidden_states_947_cast_fp16)[name = string("op_4027_shape_cast_fp16")]; int32 gather_288 = const()[name = string("gather_288"), val = int32(1)]; int32 gather_289 = const()[name = string("gather_289"), val = int32(8)]; int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)]; int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)]; bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)]; string var_4027_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4027_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_290_indices_0_to_uint16 = const()[name = string("gather_290_indices_0_to_uint16"), val = uint16(2)]; tensor var_4027_shape_cast_fp16_to_uint16 = cast(dtype = var_4027_shape_cast_fp16_to_uint16_dtype_0, x = var_4027_shape_cast_fp16)[name = string("cast_271")]; uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = gather_290_indices_0_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_4027_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")]; string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_291 = const()[name = string("gather_291"), val = int32(128)]; tensor var_4034_axes_0 = const()[name = string("op_4034_axes_0"), val = tensor([2])]; tensor var_4034_cast_fp16 = expand_dims(axes = var_4034_axes_0, x = hidden_states_947_cast_fp16)[name = string("op_4034_cast_fp16")]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_270")]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_288, gather_289, var_3926, gather_290_cast_uint16_to_int32, gather_291))[name = string("concat_165")]; tensor shape_40_cast_fp16 = shape(x = var_4034_cast_fp16)[name = string("shape_40_cast_fp16")]; int32 equal_40_y_0 = const()[name = string("equal_40_y_0"), val = int32(-1)]; tensor equal_40 = equal(x = concat_165, y = equal_40_y_0)[name = string("equal_40")]; tensor select_40 = select(a = shape_40_cast_fp16, b = concat_165, cond = equal_40)[name = string("select_40")]; tensor real_div_40 = real_div(x = select_40, y = shape_40_cast_fp16)[name = string("real_div_40")]; tensor hidden_states_949_cast_fp16 = tile(reps = real_div_40, x = var_4034_cast_fp16)[name = string("hidden_states_949_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 16, -1, 128])]; tensor key_states_41_cast_fp16 = reshape(shape = concat_166x, x = hidden_states_949_cast_fp16)[name = string("key_states_41_cast_fp16")]; tensor hidden_states_951_cast_fp16 = transpose(perm = hidden_states_951_perm_0, x = var_3991_cast_fp16)[name = string("transpose_29")]; tensor var_4044_shape_cast_fp16 = shape(x = hidden_states_951_cast_fp16)[name = string("op_4044_shape_cast_fp16")]; int32 gather_292 = const()[name = string("gather_292"), val = int32(1)]; int32 gather_293 = const()[name = string("gather_293"), val = int32(8)]; int32 gather_294_axis_0 = const()[name = string("gather_294_axis_0"), val = int32(0)]; int32 gather_294_batch_dims_0 = const()[name = string("gather_294_batch_dims_0"), val = int32(0)]; bool gather_294_validate_indices_0 = const()[name = string("gather_294_validate_indices_0"), val = bool(false)]; string var_4044_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4044_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_294_indices_0_to_uint16 = const()[name = string("gather_294_indices_0_to_uint16"), val = uint16(2)]; tensor var_4044_shape_cast_fp16_to_uint16 = cast(dtype = var_4044_shape_cast_fp16_to_uint16_dtype_0, x = var_4044_shape_cast_fp16)[name = string("cast_269")]; uint16 gather_294_cast_uint16 = gather(axis = gather_294_axis_0, batch_dims = gather_294_batch_dims_0, indices = gather_294_indices_0_to_uint16, validate_indices = gather_294_validate_indices_0, x = var_4044_shape_cast_fp16_to_uint16)[name = string("gather_294_cast_uint16")]; string gather_294_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_294_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_295 = const()[name = string("gather_295"), val = int32(128)]; tensor var_4051_axes_0 = const()[name = string("op_4051_axes_0"), val = tensor([2])]; tensor var_4051_cast_fp16 = expand_dims(axes = var_4051_axes_0, x = hidden_states_951_cast_fp16)[name = string("op_4051_cast_fp16")]; int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; int32 gather_294_cast_uint16_to_int32 = cast(dtype = gather_294_cast_uint16_to_int32_dtype_0, x = gather_294_cast_uint16)[name = string("cast_268")]; tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_292, gather_293, var_3926, gather_294_cast_uint16_to_int32, gather_295))[name = string("concat_167")]; tensor shape_41_cast_fp16 = shape(x = var_4051_cast_fp16)[name = string("shape_41_cast_fp16")]; int32 equal_41_y_0 = const()[name = string("equal_41_y_0"), val = int32(-1)]; tensor equal_41 = equal(x = concat_167, y = equal_41_y_0)[name = string("equal_41")]; tensor select_41 = select(a = shape_41_cast_fp16, b = concat_167, cond = equal_41)[name = string("select_41")]; tensor real_div_41 = real_div(x = select_41, y = shape_41_cast_fp16)[name = string("real_div_41")]; tensor hidden_states_953_cast_fp16 = tile(reps = real_div_41, x = var_4051_cast_fp16)[name = string("hidden_states_953_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 16, -1, 128])]; tensor value_states_41_cast_fp16 = reshape(shape = concat_168x, x = hidden_states_953_cast_fp16)[name = string("value_states_41_cast_fp16")]; bool var_4062_transpose_x_1 = const()[name = string("op_4062_transpose_x_1"), val = bool(false)]; bool var_4062_transpose_y_1 = const()[name = string("op_4062_transpose_y_1"), val = bool(true)]; tensor var_4062_cast_fp16 = matmul(transpose_x = var_4062_transpose_x_1, transpose_y = var_4062_transpose_y_1, x = query_41_cast_fp16, y = key_states_41_cast_fp16)[name = string("op_4062_cast_fp16")]; fp16 var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_81_cast_fp16 = mul(x = var_4062_cast_fp16, y = var_4063_to_fp16)[name = string("attn_weights_81_cast_fp16")]; tensor input_243_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_243_cast_fp16")]; tensor var_4066_cast_fp16 = softmax(axis = var_3927, x = input_243_cast_fp16)[name = string("op_4066_cast_fp16")]; bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = var_4066_cast_fp16, y = value_states_41_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_4070_perm_0 = const()[name = string("op_4070_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; int32 gather_283_cast_uint16_to_int32 = cast(dtype = gather_283_cast_uint16_to_int32_dtype_0, x = gather_283_cast_uint16)[name = string("cast_272")]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_282, gather_283_cast_uint16_to_int32, var_3927))[name = string("concat_169")]; tensor var_4070_cast_fp16 = transpose(perm = var_4070_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_28")]; tensor var_4073_cast_fp16 = reshape(shape = concat_169, x = var_4070_cast_fp16)[name = string("op_4073_cast_fp16")]; tensor layers_20_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_20_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948274304)))]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_o_proj_weight_to_fp16, x = var_4073_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor hidden_states_957_cast_fp16 = add(x = hidden_states_921_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_957_cast_fp16")]; fp16 var_3926_promoted_3_to_fp16 = const()[name = string("op_3926_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4080_cast_fp16 = pow(x = hidden_states_957_cast_fp16, y = var_3926_promoted_3_to_fp16)[name = string("op_4080_cast_fp16")]; tensor variance_167_axes_0 = const()[name = string("variance_167_axes_0"), val = tensor([-1])]; bool variance_167_keep_dims_0 = const()[name = string("variance_167_keep_dims_0"), val = bool(true)]; tensor variance_167_cast_fp16 = reduce_mean(axes = variance_167_axes_0, keep_dims = variance_167_keep_dims_0, x = var_4080_cast_fp16)[name = string("variance_167_cast_fp16")]; fp16 var_4083_to_fp16 = const()[name = string("op_4083_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4084_cast_fp16 = add(x = variance_167_cast_fp16, y = var_4083_to_fp16)[name = string("op_4084_cast_fp16")]; fp32 var_4085_epsilon_0 = const()[name = string("op_4085_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4085_cast_fp16 = rsqrt(epsilon = var_4085_epsilon_0, x = var_4084_cast_fp16)[name = string("op_4085_cast_fp16")]; tensor hidden_states_961_cast_fp16 = mul(x = hidden_states_957_cast_fp16, y = var_4085_cast_fp16)[name = string("hidden_states_961_cast_fp16")]; tensor layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(952468672)))]; tensor input_249_cast_fp16 = mul(x = layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_961_cast_fp16)[name = string("input_249_cast_fp16")]; tensor layers_20_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_20_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(952470784)))]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_gate_proj_weight_to_fp16, x = input_249_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_4097_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_4097_cast_fp16")]; tensor layers_20_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_20_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958762304)))]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_up_proj_weight_to_fp16, x = input_249_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_253_cast_fp16 = mul(x = var_4097_cast_fp16, y = linear_145_cast_fp16)[name = string("input_253_cast_fp16")]; tensor layers_20_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_20_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(965053824)))]; tensor linear_146_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_mlp_down_proj_weight_to_fp16, x = input_253_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor hidden_states_967_cast_fp16 = add(x = hidden_states_957_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_967_cast_fp16")]; int32 var_4114 = const()[name = string("op_4114"), val = int32(2)]; int32 var_4115 = const()[name = string("op_4115"), val = int32(-1)]; fp16 var_4114_promoted_to_fp16 = const()[name = string("op_4114_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4124_cast_fp16 = pow(x = hidden_states_967_cast_fp16, y = var_4114_promoted_to_fp16)[name = string("op_4124_cast_fp16")]; tensor variance_169_axes_0 = const()[name = string("variance_169_axes_0"), val = tensor([-1])]; bool variance_169_keep_dims_0 = const()[name = string("variance_169_keep_dims_0"), val = bool(true)]; tensor variance_169_cast_fp16 = reduce_mean(axes = variance_169_axes_0, keep_dims = variance_169_keep_dims_0, x = var_4124_cast_fp16)[name = string("variance_169_cast_fp16")]; fp16 var_4127_to_fp16 = const()[name = string("op_4127_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4128_cast_fp16 = add(x = variance_169_cast_fp16, y = var_4127_to_fp16)[name = string("op_4128_cast_fp16")]; fp32 var_4129_epsilon_0 = const()[name = string("op_4129_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4129_cast_fp16 = rsqrt(epsilon = var_4129_epsilon_0, x = var_4128_cast_fp16)[name = string("op_4129_cast_fp16")]; tensor hidden_states_971_cast_fp16 = mul(x = hidden_states_967_cast_fp16, y = var_4129_cast_fp16)[name = string("hidden_states_971_cast_fp16")]; tensor layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971345344)))]; tensor hidden_states_975_cast_fp16 = mul(x = layers_21_input_layernorm_weight_to_fp16, y = hidden_states_971_cast_fp16)[name = string("hidden_states_975_cast_fp16")]; tensor var_4142_shape_cast_fp16 = shape(x = hidden_states_975_cast_fp16)[name = string("op_4142_shape_cast_fp16")]; int32 gather_296 = const()[name = string("gather_296"), val = int32(1)]; int32 gather_297_axis_0 = const()[name = string("gather_297_axis_0"), val = int32(0)]; int32 gather_297_batch_dims_0 = const()[name = string("gather_297_batch_dims_0"), val = int32(0)]; bool gather_297_validate_indices_0 = const()[name = string("gather_297_validate_indices_0"), val = bool(false)]; string var_4142_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4142_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_297_indices_0_to_uint16 = const()[name = string("gather_297_indices_0_to_uint16"), val = uint16(1)]; tensor var_4142_shape_cast_fp16_to_uint16 = cast(dtype = var_4142_shape_cast_fp16_to_uint16_dtype_0, x = var_4142_shape_cast_fp16)[name = string("cast_267")]; uint16 gather_297_cast_uint16 = gather(axis = gather_297_axis_0, batch_dims = gather_297_batch_dims_0, indices = gather_297_indices_0_to_uint16, validate_indices = gather_297_validate_indices_0, x = var_4142_shape_cast_fp16_to_uint16)[name = string("gather_297_cast_uint16")]; string gather_297_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_297_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_21_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_21_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971347456)))]; tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_21_self_attn_q_proj_weight_to_fp16, x = hidden_states_975_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor concat_170x = const()[name = string("concat_170x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_977_cast_fp16 = reshape(shape = concat_170x, x = linear_147_cast_fp16)[name = string("hidden_states_977_cast_fp16")]; fp16 var_4114_promoted_1_to_fp16 = const()[name = string("op_4114_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_4150_cast_fp16 = pow(x = hidden_states_977_cast_fp16, y = var_4114_promoted_1_to_fp16)[name = string("op_4150_cast_fp16")]; tensor variance_171_axes_0 = const()[name = string("variance_171_axes_0"), val = tensor([-1])]; bool variance_171_keep_dims_0 = const()[name = string("variance_171_keep_dims_0"), val = bool(true)]; tensor variance_171_cast_fp16 = reduce_mean(axes = variance_171_axes_0, keep_dims = variance_171_keep_dims_0, x = var_4150_cast_fp16)[name = string("variance_171_cast_fp16")]; fp16 var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4154_cast_fp16 = add(x = variance_171_cast_fp16, y = var_4153_to_fp16)[name = string("op_4154_cast_fp16")]; fp32 var_4155_epsilon_0 = const()[name = string("op_4155_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4155_cast_fp16 = rsqrt(epsilon = var_4155_epsilon_0, x = var_4154_cast_fp16)[name = string("op_4155_cast_fp16")]; tensor hidden_states_981_cast_fp16 = mul(x = hidden_states_977_cast_fp16, y = var_4155_cast_fp16)[name = string("hidden_states_981_cast_fp16")]; tensor layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975541824)))]; tensor var_4158_cast_fp16 = mul(x = layers_21_self_attn_q_norm_weight_to_fp16, y = hidden_states_981_cast_fp16)[name = string("op_4158_cast_fp16")]; tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_21_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_21_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975542144)))]; tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_k_proj_weight_to_fp16, x = hidden_states_975_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_985_cast_fp16 = reshape(shape = concat_171x, x = linear_148_cast_fp16)[name = string("hidden_states_985_cast_fp16")]; fp16 var_4114_promoted_2_to_fp16 = const()[name = string("op_4114_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_4166_cast_fp16 = pow(x = hidden_states_985_cast_fp16, y = var_4114_promoted_2_to_fp16)[name = string("op_4166_cast_fp16")]; tensor variance_173_axes_0 = const()[name = string("variance_173_axes_0"), val = tensor([-1])]; bool variance_173_keep_dims_0 = const()[name = string("variance_173_keep_dims_0"), val = bool(true)]; tensor variance_173_cast_fp16 = reduce_mean(axes = variance_173_axes_0, keep_dims = variance_173_keep_dims_0, x = var_4166_cast_fp16)[name = string("variance_173_cast_fp16")]; fp16 var_4169_to_fp16 = const()[name = string("op_4169_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4170_cast_fp16 = add(x = variance_173_cast_fp16, y = var_4169_to_fp16)[name = string("op_4170_cast_fp16")]; fp32 var_4171_epsilon_0 = const()[name = string("op_4171_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4171_cast_fp16 = rsqrt(epsilon = var_4171_epsilon_0, x = var_4170_cast_fp16)[name = string("op_4171_cast_fp16")]; tensor hidden_states_989_cast_fp16 = mul(x = hidden_states_985_cast_fp16, y = var_4171_cast_fp16)[name = string("hidden_states_989_cast_fp16")]; tensor layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977639360)))]; tensor var_4174_cast_fp16 = mul(x = layers_21_self_attn_k_norm_weight_to_fp16, y = hidden_states_989_cast_fp16)[name = string("op_4174_cast_fp16")]; tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_21_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_21_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977639680)))]; tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_v_proj_weight_to_fp16, x = hidden_states_975_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 8, 128])]; tensor var_4179_cast_fp16 = reshape(shape = concat_172x, x = linear_149_cast_fp16)[name = string("op_4179_cast_fp16")]; tensor hidden_states_997_perm_0 = const()[name = string("hidden_states_997_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_4158_cast_fp16)[name = string("transpose_27")]; tensor var_4183_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4183_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4194_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_4194_cast_fp16")]; bool var_4196_interleave_0 = const()[name = string("op_4196_interleave_0"), val = bool(false)]; tensor var_4196_cast_fp16 = concat(axis = var_4115, interleave = var_4196_interleave_0, values = (var_4194_cast_fp16, x1_85_cast_fp16))[name = string("op_4196_cast_fp16")]; tensor var_4197_cast_fp16 = mul(x = var_4196_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4197_cast_fp16")]; tensor query_43_cast_fp16 = add(x = var_4183_cast_fp16, y = var_4197_cast_fp16)[name = string("query_43_cast_fp16")]; tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_4174_cast_fp16)[name = string("transpose_26")]; tensor var_4199_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4210_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_4210_cast_fp16")]; bool var_4212_interleave_0 = const()[name = string("op_4212_interleave_0"), val = bool(false)]; tensor var_4212_cast_fp16 = concat(axis = var_4115, interleave = var_4212_interleave_0, values = (var_4210_cast_fp16, x1_87_cast_fp16))[name = string("op_4212_cast_fp16")]; tensor var_4213_cast_fp16 = mul(x = var_4212_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4213_cast_fp16")]; tensor hidden_states_993_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4213_cast_fp16)[name = string("hidden_states_993_cast_fp16")]; tensor var_4215_shape_cast_fp16 = shape(x = hidden_states_993_cast_fp16)[name = string("op_4215_shape_cast_fp16")]; int32 gather_302 = const()[name = string("gather_302"), val = int32(1)]; int32 gather_303 = const()[name = string("gather_303"), val = int32(8)]; int32 gather_304_axis_0 = const()[name = string("gather_304_axis_0"), val = int32(0)]; int32 gather_304_batch_dims_0 = const()[name = string("gather_304_batch_dims_0"), val = int32(0)]; bool gather_304_validate_indices_0 = const()[name = string("gather_304_validate_indices_0"), val = bool(false)]; string var_4215_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4215_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_304_indices_0_to_uint16 = const()[name = string("gather_304_indices_0_to_uint16"), val = uint16(2)]; tensor var_4215_shape_cast_fp16_to_uint16 = cast(dtype = var_4215_shape_cast_fp16_to_uint16_dtype_0, x = var_4215_shape_cast_fp16)[name = string("cast_265")]; uint16 gather_304_cast_uint16 = gather(axis = gather_304_axis_0, batch_dims = gather_304_batch_dims_0, indices = gather_304_indices_0_to_uint16, validate_indices = gather_304_validate_indices_0, x = var_4215_shape_cast_fp16_to_uint16)[name = string("gather_304_cast_uint16")]; string gather_304_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_304_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_305 = const()[name = string("gather_305"), val = int32(128)]; tensor var_4222_axes_0 = const()[name = string("op_4222_axes_0"), val = tensor([2])]; tensor var_4222_cast_fp16 = expand_dims(axes = var_4222_axes_0, x = hidden_states_993_cast_fp16)[name = string("op_4222_cast_fp16")]; int32 concat_173_axis_0 = const()[name = string("concat_173_axis_0"), val = int32(0)]; bool concat_173_interleave_0 = const()[name = string("concat_173_interleave_0"), val = bool(false)]; int32 gather_304_cast_uint16_to_int32 = cast(dtype = gather_304_cast_uint16_to_int32_dtype_0, x = gather_304_cast_uint16)[name = string("cast_264")]; tensor concat_173 = concat(axis = concat_173_axis_0, interleave = concat_173_interleave_0, values = (gather_302, gather_303, var_4114, gather_304_cast_uint16_to_int32, gather_305))[name = string("concat_173")]; tensor shape_42_cast_fp16 = shape(x = var_4222_cast_fp16)[name = string("shape_42_cast_fp16")]; int32 equal_42_y_0 = const()[name = string("equal_42_y_0"), val = int32(-1)]; tensor equal_42 = equal(x = concat_173, y = equal_42_y_0)[name = string("equal_42")]; tensor select_42 = select(a = shape_42_cast_fp16, b = concat_173, cond = equal_42)[name = string("select_42")]; tensor real_div_42 = real_div(x = select_42, y = shape_42_cast_fp16)[name = string("real_div_42")]; tensor hidden_states_995_cast_fp16 = tile(reps = real_div_42, x = var_4222_cast_fp16)[name = string("hidden_states_995_cast_fp16")]; tensor concat_174x = const()[name = string("concat_174x"), val = tensor([1, 16, -1, 128])]; tensor key_states_43_cast_fp16 = reshape(shape = concat_174x, x = hidden_states_995_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor hidden_states_997_cast_fp16 = transpose(perm = hidden_states_997_perm_0, x = var_4179_cast_fp16)[name = string("transpose_25")]; tensor var_4232_shape_cast_fp16 = shape(x = hidden_states_997_cast_fp16)[name = string("op_4232_shape_cast_fp16")]; int32 gather_306 = const()[name = string("gather_306"), val = int32(1)]; int32 gather_307 = const()[name = string("gather_307"), val = int32(8)]; int32 gather_308_axis_0 = const()[name = string("gather_308_axis_0"), val = int32(0)]; int32 gather_308_batch_dims_0 = const()[name = string("gather_308_batch_dims_0"), val = int32(0)]; bool gather_308_validate_indices_0 = const()[name = string("gather_308_validate_indices_0"), val = bool(false)]; string var_4232_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4232_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_308_indices_0_to_uint16 = const()[name = string("gather_308_indices_0_to_uint16"), val = uint16(2)]; tensor var_4232_shape_cast_fp16_to_uint16 = cast(dtype = var_4232_shape_cast_fp16_to_uint16_dtype_0, x = var_4232_shape_cast_fp16)[name = string("cast_263")]; uint16 gather_308_cast_uint16 = gather(axis = gather_308_axis_0, batch_dims = gather_308_batch_dims_0, indices = gather_308_indices_0_to_uint16, validate_indices = gather_308_validate_indices_0, x = var_4232_shape_cast_fp16_to_uint16)[name = string("gather_308_cast_uint16")]; string gather_308_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_308_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_309 = const()[name = string("gather_309"), val = int32(128)]; tensor var_4239_axes_0 = const()[name = string("op_4239_axes_0"), val = tensor([2])]; tensor var_4239_cast_fp16 = expand_dims(axes = var_4239_axes_0, x = hidden_states_997_cast_fp16)[name = string("op_4239_cast_fp16")]; int32 concat_175_axis_0 = const()[name = string("concat_175_axis_0"), val = int32(0)]; bool concat_175_interleave_0 = const()[name = string("concat_175_interleave_0"), val = bool(false)]; int32 gather_308_cast_uint16_to_int32 = cast(dtype = gather_308_cast_uint16_to_int32_dtype_0, x = gather_308_cast_uint16)[name = string("cast_262")]; tensor concat_175 = concat(axis = concat_175_axis_0, interleave = concat_175_interleave_0, values = (gather_306, gather_307, var_4114, gather_308_cast_uint16_to_int32, gather_309))[name = string("concat_175")]; tensor shape_43_cast_fp16 = shape(x = var_4239_cast_fp16)[name = string("shape_43_cast_fp16")]; int32 equal_43_y_0 = const()[name = string("equal_43_y_0"), val = int32(-1)]; tensor equal_43 = equal(x = concat_175, y = equal_43_y_0)[name = string("equal_43")]; tensor select_43 = select(a = shape_43_cast_fp16, b = concat_175, cond = equal_43)[name = string("select_43")]; tensor real_div_43 = real_div(x = select_43, y = shape_43_cast_fp16)[name = string("real_div_43")]; tensor hidden_states_999_cast_fp16 = tile(reps = real_div_43, x = var_4239_cast_fp16)[name = string("hidden_states_999_cast_fp16")]; tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, 16, -1, 128])]; tensor value_states_43_cast_fp16 = reshape(shape = concat_176x, x = hidden_states_999_cast_fp16)[name = string("value_states_43_cast_fp16")]; bool var_4250_transpose_x_1 = const()[name = string("op_4250_transpose_x_1"), val = bool(false)]; bool var_4250_transpose_y_1 = const()[name = string("op_4250_transpose_y_1"), val = bool(true)]; tensor var_4250_cast_fp16 = matmul(transpose_x = var_4250_transpose_x_1, transpose_y = var_4250_transpose_y_1, x = query_43_cast_fp16, y = key_states_43_cast_fp16)[name = string("op_4250_cast_fp16")]; fp16 var_4251_to_fp16 = const()[name = string("op_4251_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_4250_cast_fp16, y = var_4251_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor input_255_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_255_cast_fp16")]; tensor var_4254_cast_fp16 = softmax(axis = var_4115, x = input_255_cast_fp16)[name = string("op_4254_cast_fp16")]; bool attn_output_85_transpose_x_0 = const()[name = string("attn_output_85_transpose_x_0"), val = bool(false)]; bool attn_output_85_transpose_y_0 = const()[name = string("attn_output_85_transpose_y_0"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_0, transpose_y = attn_output_85_transpose_y_0, x = var_4254_cast_fp16, y = value_states_43_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_4258_perm_0 = const()[name = string("op_4258_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)]; bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)]; int32 gather_297_cast_uint16_to_int32 = cast(dtype = gather_297_cast_uint16_to_int32_dtype_0, x = gather_297_cast_uint16)[name = string("cast_266")]; tensor concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (gather_296, gather_297_cast_uint16_to_int32, var_4115))[name = string("concat_177")]; tensor var_4258_cast_fp16 = transpose(perm = var_4258_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_24")]; tensor var_4261_cast_fp16 = reshape(shape = concat_177, x = var_4258_cast_fp16)[name = string("op_4261_cast_fp16")]; tensor layers_21_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_21_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(979736896)))]; tensor linear_150_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_o_proj_weight_to_fp16, x = var_4261_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor hidden_states_1003_cast_fp16 = add(x = hidden_states_967_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_1003_cast_fp16")]; fp16 var_4114_promoted_3_to_fp16 = const()[name = string("op_4114_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4268_cast_fp16 = pow(x = hidden_states_1003_cast_fp16, y = var_4114_promoted_3_to_fp16)[name = string("op_4268_cast_fp16")]; tensor variance_175_axes_0 = const()[name = string("variance_175_axes_0"), val = tensor([-1])]; bool variance_175_keep_dims_0 = const()[name = string("variance_175_keep_dims_0"), val = bool(true)]; tensor variance_175_cast_fp16 = reduce_mean(axes = variance_175_axes_0, keep_dims = variance_175_keep_dims_0, x = var_4268_cast_fp16)[name = string("variance_175_cast_fp16")]; fp16 var_4271_to_fp16 = const()[name = string("op_4271_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4272_cast_fp16 = add(x = variance_175_cast_fp16, y = var_4271_to_fp16)[name = string("op_4272_cast_fp16")]; fp32 var_4273_epsilon_0 = const()[name = string("op_4273_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4273_cast_fp16 = rsqrt(epsilon = var_4273_epsilon_0, x = var_4272_cast_fp16)[name = string("op_4273_cast_fp16")]; tensor hidden_states_1007_cast_fp16 = mul(x = hidden_states_1003_cast_fp16, y = var_4273_cast_fp16)[name = string("hidden_states_1007_cast_fp16")]; tensor layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(983931264)))]; tensor input_261_cast_fp16 = mul(x = layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_1007_cast_fp16)[name = string("input_261_cast_fp16")]; tensor layers_21_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_21_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(983933376)))]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_gate_proj_weight_to_fp16, x = input_261_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_4285_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4285_cast_fp16")]; tensor layers_21_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_21_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(990224896)))]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_up_proj_weight_to_fp16, x = input_261_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_265_cast_fp16 = mul(x = var_4285_cast_fp16, y = linear_152_cast_fp16)[name = string("input_265_cast_fp16")]; tensor layers_21_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_21_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(996516416)))]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_mlp_down_proj_weight_to_fp16, x = input_265_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor hidden_states_1013_cast_fp16 = add(x = hidden_states_1003_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_1013_cast_fp16")]; int32 var_4302 = const()[name = string("op_4302"), val = int32(2)]; int32 var_4303 = const()[name = string("op_4303"), val = int32(-1)]; fp16 var_4302_promoted_to_fp16 = const()[name = string("op_4302_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4312_cast_fp16 = pow(x = hidden_states_1013_cast_fp16, y = var_4302_promoted_to_fp16)[name = string("op_4312_cast_fp16")]; tensor variance_177_axes_0 = const()[name = string("variance_177_axes_0"), val = tensor([-1])]; bool variance_177_keep_dims_0 = const()[name = string("variance_177_keep_dims_0"), val = bool(true)]; tensor variance_177_cast_fp16 = reduce_mean(axes = variance_177_axes_0, keep_dims = variance_177_keep_dims_0, x = var_4312_cast_fp16)[name = string("variance_177_cast_fp16")]; fp16 var_4315_to_fp16 = const()[name = string("op_4315_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4316_cast_fp16 = add(x = variance_177_cast_fp16, y = var_4315_to_fp16)[name = string("op_4316_cast_fp16")]; fp32 var_4317_epsilon_0 = const()[name = string("op_4317_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4317_cast_fp16 = rsqrt(epsilon = var_4317_epsilon_0, x = var_4316_cast_fp16)[name = string("op_4317_cast_fp16")]; tensor hidden_states_1017_cast_fp16 = mul(x = hidden_states_1013_cast_fp16, y = var_4317_cast_fp16)[name = string("hidden_states_1017_cast_fp16")]; tensor layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1002807936)))]; tensor hidden_states_1021_cast_fp16 = mul(x = layers_22_input_layernorm_weight_to_fp16, y = hidden_states_1017_cast_fp16)[name = string("hidden_states_1021_cast_fp16")]; tensor var_4330_shape_cast_fp16 = shape(x = hidden_states_1021_cast_fp16)[name = string("op_4330_shape_cast_fp16")]; int32 gather_310 = const()[name = string("gather_310"), val = int32(1)]; int32 gather_311_axis_0 = const()[name = string("gather_311_axis_0"), val = int32(0)]; int32 gather_311_batch_dims_0 = const()[name = string("gather_311_batch_dims_0"), val = int32(0)]; bool gather_311_validate_indices_0 = const()[name = string("gather_311_validate_indices_0"), val = bool(false)]; string var_4330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_311_indices_0_to_uint16 = const()[name = string("gather_311_indices_0_to_uint16"), val = uint16(1)]; tensor var_4330_shape_cast_fp16_to_uint16 = cast(dtype = var_4330_shape_cast_fp16_to_uint16_dtype_0, x = var_4330_shape_cast_fp16)[name = string("cast_261")]; uint16 gather_311_cast_uint16 = gather(axis = gather_311_axis_0, batch_dims = gather_311_batch_dims_0, indices = gather_311_indices_0_to_uint16, validate_indices = gather_311_validate_indices_0, x = var_4330_shape_cast_fp16_to_uint16)[name = string("gather_311_cast_uint16")]; string gather_311_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_311_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_22_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_22_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1002810048)))]; tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_22_self_attn_q_proj_weight_to_fp16, x = hidden_states_1021_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor concat_178x = const()[name = string("concat_178x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1023_cast_fp16 = reshape(shape = concat_178x, x = linear_154_cast_fp16)[name = string("hidden_states_1023_cast_fp16")]; fp16 var_4302_promoted_1_to_fp16 = const()[name = string("op_4302_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_4338_cast_fp16 = pow(x = hidden_states_1023_cast_fp16, y = var_4302_promoted_1_to_fp16)[name = string("op_4338_cast_fp16")]; tensor variance_179_axes_0 = const()[name = string("variance_179_axes_0"), val = tensor([-1])]; bool variance_179_keep_dims_0 = const()[name = string("variance_179_keep_dims_0"), val = bool(true)]; tensor variance_179_cast_fp16 = reduce_mean(axes = variance_179_axes_0, keep_dims = variance_179_keep_dims_0, x = var_4338_cast_fp16)[name = string("variance_179_cast_fp16")]; fp16 var_4341_to_fp16 = const()[name = string("op_4341_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4342_cast_fp16 = add(x = variance_179_cast_fp16, y = var_4341_to_fp16)[name = string("op_4342_cast_fp16")]; fp32 var_4343_epsilon_0 = const()[name = string("op_4343_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4343_cast_fp16 = rsqrt(epsilon = var_4343_epsilon_0, x = var_4342_cast_fp16)[name = string("op_4343_cast_fp16")]; tensor hidden_states_1027_cast_fp16 = mul(x = hidden_states_1023_cast_fp16, y = var_4343_cast_fp16)[name = string("hidden_states_1027_cast_fp16")]; tensor layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007004416)))]; tensor var_4346_cast_fp16 = mul(x = layers_22_self_attn_q_norm_weight_to_fp16, y = hidden_states_1027_cast_fp16)[name = string("op_4346_cast_fp16")]; tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_22_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_22_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007004736)))]; tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_k_proj_weight_to_fp16, x = hidden_states_1021_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor concat_179x = const()[name = string("concat_179x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1031_cast_fp16 = reshape(shape = concat_179x, x = linear_155_cast_fp16)[name = string("hidden_states_1031_cast_fp16")]; fp16 var_4302_promoted_2_to_fp16 = const()[name = string("op_4302_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_4354_cast_fp16 = pow(x = hidden_states_1031_cast_fp16, y = var_4302_promoted_2_to_fp16)[name = string("op_4354_cast_fp16")]; tensor variance_181_axes_0 = const()[name = string("variance_181_axes_0"), val = tensor([-1])]; bool variance_181_keep_dims_0 = const()[name = string("variance_181_keep_dims_0"), val = bool(true)]; tensor variance_181_cast_fp16 = reduce_mean(axes = variance_181_axes_0, keep_dims = variance_181_keep_dims_0, x = var_4354_cast_fp16)[name = string("variance_181_cast_fp16")]; fp16 var_4357_to_fp16 = const()[name = string("op_4357_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4358_cast_fp16 = add(x = variance_181_cast_fp16, y = var_4357_to_fp16)[name = string("op_4358_cast_fp16")]; fp32 var_4359_epsilon_0 = const()[name = string("op_4359_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4359_cast_fp16 = rsqrt(epsilon = var_4359_epsilon_0, x = var_4358_cast_fp16)[name = string("op_4359_cast_fp16")]; tensor hidden_states_1035_cast_fp16 = mul(x = hidden_states_1031_cast_fp16, y = var_4359_cast_fp16)[name = string("hidden_states_1035_cast_fp16")]; tensor layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1009101952)))]; tensor var_4362_cast_fp16 = mul(x = layers_22_self_attn_k_norm_weight_to_fp16, y = hidden_states_1035_cast_fp16)[name = string("op_4362_cast_fp16")]; tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_22_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_22_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1009102272)))]; tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_v_proj_weight_to_fp16, x = hidden_states_1021_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor concat_180x = const()[name = string("concat_180x"), val = tensor([1, -1, 8, 128])]; tensor var_4367_cast_fp16 = reshape(shape = concat_180x, x = linear_156_cast_fp16)[name = string("op_4367_cast_fp16")]; tensor hidden_states_1043_perm_0 = const()[name = string("hidden_states_1043_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_4346_cast_fp16)[name = string("transpose_23")]; tensor var_4371_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4371_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4382_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_4382_cast_fp16")]; bool var_4384_interleave_0 = const()[name = string("op_4384_interleave_0"), val = bool(false)]; tensor var_4384_cast_fp16 = concat(axis = var_4303, interleave = var_4384_interleave_0, values = (var_4382_cast_fp16, x1_89_cast_fp16))[name = string("op_4384_cast_fp16")]; tensor var_4385_cast_fp16 = mul(x = var_4384_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4385_cast_fp16")]; tensor query_45_cast_fp16 = add(x = var_4371_cast_fp16, y = var_4385_cast_fp16)[name = string("query_45_cast_fp16")]; tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_4362_cast_fp16)[name = string("transpose_22")]; tensor var_4387_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4387_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4398_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_4398_cast_fp16")]; bool var_4400_interleave_0 = const()[name = string("op_4400_interleave_0"), val = bool(false)]; tensor var_4400_cast_fp16 = concat(axis = var_4303, interleave = var_4400_interleave_0, values = (var_4398_cast_fp16, x1_91_cast_fp16))[name = string("op_4400_cast_fp16")]; tensor var_4401_cast_fp16 = mul(x = var_4400_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4401_cast_fp16")]; tensor hidden_states_1039_cast_fp16 = add(x = var_4387_cast_fp16, y = var_4401_cast_fp16)[name = string("hidden_states_1039_cast_fp16")]; tensor var_4403_shape_cast_fp16 = shape(x = hidden_states_1039_cast_fp16)[name = string("op_4403_shape_cast_fp16")]; int32 gather_316 = const()[name = string("gather_316"), val = int32(1)]; int32 gather_317 = const()[name = string("gather_317"), val = int32(8)]; int32 gather_318_axis_0 = const()[name = string("gather_318_axis_0"), val = int32(0)]; int32 gather_318_batch_dims_0 = const()[name = string("gather_318_batch_dims_0"), val = int32(0)]; bool gather_318_validate_indices_0 = const()[name = string("gather_318_validate_indices_0"), val = bool(false)]; string var_4403_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4403_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_318_indices_0_to_uint16 = const()[name = string("gather_318_indices_0_to_uint16"), val = uint16(2)]; tensor var_4403_shape_cast_fp16_to_uint16 = cast(dtype = var_4403_shape_cast_fp16_to_uint16_dtype_0, x = var_4403_shape_cast_fp16)[name = string("cast_259")]; uint16 gather_318_cast_uint16 = gather(axis = gather_318_axis_0, batch_dims = gather_318_batch_dims_0, indices = gather_318_indices_0_to_uint16, validate_indices = gather_318_validate_indices_0, x = var_4403_shape_cast_fp16_to_uint16)[name = string("gather_318_cast_uint16")]; string gather_318_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_318_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_319 = const()[name = string("gather_319"), val = int32(128)]; tensor var_4410_axes_0 = const()[name = string("op_4410_axes_0"), val = tensor([2])]; tensor var_4410_cast_fp16 = expand_dims(axes = var_4410_axes_0, x = hidden_states_1039_cast_fp16)[name = string("op_4410_cast_fp16")]; int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; int32 gather_318_cast_uint16_to_int32 = cast(dtype = gather_318_cast_uint16_to_int32_dtype_0, x = gather_318_cast_uint16)[name = string("cast_258")]; tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (gather_316, gather_317, var_4302, gather_318_cast_uint16_to_int32, gather_319))[name = string("concat_181")]; tensor shape_44_cast_fp16 = shape(x = var_4410_cast_fp16)[name = string("shape_44_cast_fp16")]; int32 equal_44_y_0 = const()[name = string("equal_44_y_0"), val = int32(-1)]; tensor equal_44 = equal(x = concat_181, y = equal_44_y_0)[name = string("equal_44")]; tensor select_44 = select(a = shape_44_cast_fp16, b = concat_181, cond = equal_44)[name = string("select_44")]; tensor real_div_44 = real_div(x = select_44, y = shape_44_cast_fp16)[name = string("real_div_44")]; tensor hidden_states_1041_cast_fp16 = tile(reps = real_div_44, x = var_4410_cast_fp16)[name = string("hidden_states_1041_cast_fp16")]; tensor concat_182x = const()[name = string("concat_182x"), val = tensor([1, 16, -1, 128])]; tensor key_states_45_cast_fp16 = reshape(shape = concat_182x, x = hidden_states_1041_cast_fp16)[name = string("key_states_45_cast_fp16")]; tensor hidden_states_1043_cast_fp16 = transpose(perm = hidden_states_1043_perm_0, x = var_4367_cast_fp16)[name = string("transpose_21")]; tensor var_4420_shape_cast_fp16 = shape(x = hidden_states_1043_cast_fp16)[name = string("op_4420_shape_cast_fp16")]; int32 gather_320 = const()[name = string("gather_320"), val = int32(1)]; int32 gather_321 = const()[name = string("gather_321"), val = int32(8)]; int32 gather_322_axis_0 = const()[name = string("gather_322_axis_0"), val = int32(0)]; int32 gather_322_batch_dims_0 = const()[name = string("gather_322_batch_dims_0"), val = int32(0)]; bool gather_322_validate_indices_0 = const()[name = string("gather_322_validate_indices_0"), val = bool(false)]; string var_4420_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4420_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_322_indices_0_to_uint16 = const()[name = string("gather_322_indices_0_to_uint16"), val = uint16(2)]; tensor var_4420_shape_cast_fp16_to_uint16 = cast(dtype = var_4420_shape_cast_fp16_to_uint16_dtype_0, x = var_4420_shape_cast_fp16)[name = string("cast_257")]; uint16 gather_322_cast_uint16 = gather(axis = gather_322_axis_0, batch_dims = gather_322_batch_dims_0, indices = gather_322_indices_0_to_uint16, validate_indices = gather_322_validate_indices_0, x = var_4420_shape_cast_fp16_to_uint16)[name = string("gather_322_cast_uint16")]; string gather_322_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_322_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_323 = const()[name = string("gather_323"), val = int32(128)]; tensor var_4427_axes_0 = const()[name = string("op_4427_axes_0"), val = tensor([2])]; tensor var_4427_cast_fp16 = expand_dims(axes = var_4427_axes_0, x = hidden_states_1043_cast_fp16)[name = string("op_4427_cast_fp16")]; int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; int32 gather_322_cast_uint16_to_int32 = cast(dtype = gather_322_cast_uint16_to_int32_dtype_0, x = gather_322_cast_uint16)[name = string("cast_256")]; tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (gather_320, gather_321, var_4302, gather_322_cast_uint16_to_int32, gather_323))[name = string("concat_183")]; tensor shape_45_cast_fp16 = shape(x = var_4427_cast_fp16)[name = string("shape_45_cast_fp16")]; int32 equal_45_y_0 = const()[name = string("equal_45_y_0"), val = int32(-1)]; tensor equal_45 = equal(x = concat_183, y = equal_45_y_0)[name = string("equal_45")]; tensor select_45 = select(a = shape_45_cast_fp16, b = concat_183, cond = equal_45)[name = string("select_45")]; tensor real_div_45 = real_div(x = select_45, y = shape_45_cast_fp16)[name = string("real_div_45")]; tensor hidden_states_1045_cast_fp16 = tile(reps = real_div_45, x = var_4427_cast_fp16)[name = string("hidden_states_1045_cast_fp16")]; tensor concat_184x = const()[name = string("concat_184x"), val = tensor([1, 16, -1, 128])]; tensor value_states_45_cast_fp16 = reshape(shape = concat_184x, x = hidden_states_1045_cast_fp16)[name = string("value_states_45_cast_fp16")]; bool var_4438_transpose_x_1 = const()[name = string("op_4438_transpose_x_1"), val = bool(false)]; bool var_4438_transpose_y_1 = const()[name = string("op_4438_transpose_y_1"), val = bool(true)]; tensor var_4438_cast_fp16 = matmul(transpose_x = var_4438_transpose_x_1, transpose_y = var_4438_transpose_y_1, x = query_45_cast_fp16, y = key_states_45_cast_fp16)[name = string("op_4438_cast_fp16")]; fp16 var_4439_to_fp16 = const()[name = string("op_4439_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_89_cast_fp16 = mul(x = var_4438_cast_fp16, y = var_4439_to_fp16)[name = string("attn_weights_89_cast_fp16")]; tensor input_267_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_267_cast_fp16")]; tensor var_4442_cast_fp16 = softmax(axis = var_4303, x = input_267_cast_fp16)[name = string("op_4442_cast_fp16")]; bool attn_output_89_transpose_x_0 = const()[name = string("attn_output_89_transpose_x_0"), val = bool(false)]; bool attn_output_89_transpose_y_0 = const()[name = string("attn_output_89_transpose_y_0"), val = bool(false)]; tensor attn_output_89_cast_fp16 = matmul(transpose_x = attn_output_89_transpose_x_0, transpose_y = attn_output_89_transpose_y_0, x = var_4442_cast_fp16, y = value_states_45_cast_fp16)[name = string("attn_output_89_cast_fp16")]; tensor var_4446_perm_0 = const()[name = string("op_4446_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_185_axis_0 = const()[name = string("concat_185_axis_0"), val = int32(0)]; bool concat_185_interleave_0 = const()[name = string("concat_185_interleave_0"), val = bool(false)]; int32 gather_311_cast_uint16_to_int32 = cast(dtype = gather_311_cast_uint16_to_int32_dtype_0, x = gather_311_cast_uint16)[name = string("cast_260")]; tensor concat_185 = concat(axis = concat_185_axis_0, interleave = concat_185_interleave_0, values = (gather_310, gather_311_cast_uint16_to_int32, var_4303))[name = string("concat_185")]; tensor var_4446_cast_fp16 = transpose(perm = var_4446_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_20")]; tensor var_4449_cast_fp16 = reshape(shape = concat_185, x = var_4446_cast_fp16)[name = string("op_4449_cast_fp16")]; tensor layers_22_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_22_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1011199488)))]; tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_o_proj_weight_to_fp16, x = var_4449_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor hidden_states_1049_cast_fp16 = add(x = hidden_states_1013_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_1049_cast_fp16")]; fp16 var_4302_promoted_3_to_fp16 = const()[name = string("op_4302_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4456_cast_fp16 = pow(x = hidden_states_1049_cast_fp16, y = var_4302_promoted_3_to_fp16)[name = string("op_4456_cast_fp16")]; tensor variance_183_axes_0 = const()[name = string("variance_183_axes_0"), val = tensor([-1])]; bool variance_183_keep_dims_0 = const()[name = string("variance_183_keep_dims_0"), val = bool(true)]; tensor variance_183_cast_fp16 = reduce_mean(axes = variance_183_axes_0, keep_dims = variance_183_keep_dims_0, x = var_4456_cast_fp16)[name = string("variance_183_cast_fp16")]; fp16 var_4459_to_fp16 = const()[name = string("op_4459_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4460_cast_fp16 = add(x = variance_183_cast_fp16, y = var_4459_to_fp16)[name = string("op_4460_cast_fp16")]; fp32 var_4461_epsilon_0 = const()[name = string("op_4461_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4461_cast_fp16 = rsqrt(epsilon = var_4461_epsilon_0, x = var_4460_cast_fp16)[name = string("op_4461_cast_fp16")]; tensor hidden_states_1053_cast_fp16 = mul(x = hidden_states_1049_cast_fp16, y = var_4461_cast_fp16)[name = string("hidden_states_1053_cast_fp16")]; tensor layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1015393856)))]; tensor input_273_cast_fp16 = mul(x = layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_1053_cast_fp16)[name = string("input_273_cast_fp16")]; tensor layers_22_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_22_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1015395968)))]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_gate_proj_weight_to_fp16, x = input_273_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_4473_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4473_cast_fp16")]; tensor layers_22_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_22_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1021687488)))]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_up_proj_weight_to_fp16, x = input_273_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_277_cast_fp16 = mul(x = var_4473_cast_fp16, y = linear_159_cast_fp16)[name = string("input_277_cast_fp16")]; tensor layers_22_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_22_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027979008)))]; tensor linear_160_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_mlp_down_proj_weight_to_fp16, x = input_277_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor hidden_states_1059_cast_fp16 = add(x = hidden_states_1049_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_1059_cast_fp16")]; int32 var_4490 = const()[name = string("op_4490"), val = int32(2)]; int32 var_4491 = const()[name = string("op_4491"), val = int32(-1)]; fp16 var_4490_promoted_to_fp16 = const()[name = string("op_4490_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4500_cast_fp16 = pow(x = hidden_states_1059_cast_fp16, y = var_4490_promoted_to_fp16)[name = string("op_4500_cast_fp16")]; tensor variance_185_axes_0 = const()[name = string("variance_185_axes_0"), val = tensor([-1])]; bool variance_185_keep_dims_0 = const()[name = string("variance_185_keep_dims_0"), val = bool(true)]; tensor variance_185_cast_fp16 = reduce_mean(axes = variance_185_axes_0, keep_dims = variance_185_keep_dims_0, x = var_4500_cast_fp16)[name = string("variance_185_cast_fp16")]; fp16 var_4503_to_fp16 = const()[name = string("op_4503_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4504_cast_fp16 = add(x = variance_185_cast_fp16, y = var_4503_to_fp16)[name = string("op_4504_cast_fp16")]; fp32 var_4505_epsilon_0 = const()[name = string("op_4505_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4505_cast_fp16 = rsqrt(epsilon = var_4505_epsilon_0, x = var_4504_cast_fp16)[name = string("op_4505_cast_fp16")]; tensor hidden_states_1063_cast_fp16 = mul(x = hidden_states_1059_cast_fp16, y = var_4505_cast_fp16)[name = string("hidden_states_1063_cast_fp16")]; tensor layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1034270528)))]; tensor hidden_states_1067_cast_fp16 = mul(x = layers_23_input_layernorm_weight_to_fp16, y = hidden_states_1063_cast_fp16)[name = string("hidden_states_1067_cast_fp16")]; tensor var_4518_shape_cast_fp16 = shape(x = hidden_states_1067_cast_fp16)[name = string("op_4518_shape_cast_fp16")]; int32 gather_324 = const()[name = string("gather_324"), val = int32(1)]; int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; string var_4518_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4518_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_325_indices_0_to_uint16 = const()[name = string("gather_325_indices_0_to_uint16"), val = uint16(1)]; tensor var_4518_shape_cast_fp16_to_uint16 = cast(dtype = var_4518_shape_cast_fp16_to_uint16_dtype_0, x = var_4518_shape_cast_fp16)[name = string("cast_255")]; uint16 gather_325_cast_uint16 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = gather_325_indices_0_to_uint16, validate_indices = gather_325_validate_indices_0, x = var_4518_shape_cast_fp16_to_uint16)[name = string("gather_325_cast_uint16")]; string gather_325_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_325_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_23_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_23_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1034272640)))]; tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_23_self_attn_q_proj_weight_to_fp16, x = hidden_states_1067_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor concat_186x = const()[name = string("concat_186x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1069_cast_fp16 = reshape(shape = concat_186x, x = linear_161_cast_fp16)[name = string("hidden_states_1069_cast_fp16")]; fp16 var_4490_promoted_1_to_fp16 = const()[name = string("op_4490_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_4526_cast_fp16 = pow(x = hidden_states_1069_cast_fp16, y = var_4490_promoted_1_to_fp16)[name = string("op_4526_cast_fp16")]; tensor variance_187_axes_0 = const()[name = string("variance_187_axes_0"), val = tensor([-1])]; bool variance_187_keep_dims_0 = const()[name = string("variance_187_keep_dims_0"), val = bool(true)]; tensor variance_187_cast_fp16 = reduce_mean(axes = variance_187_axes_0, keep_dims = variance_187_keep_dims_0, x = var_4526_cast_fp16)[name = string("variance_187_cast_fp16")]; fp16 var_4529_to_fp16 = const()[name = string("op_4529_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4530_cast_fp16 = add(x = variance_187_cast_fp16, y = var_4529_to_fp16)[name = string("op_4530_cast_fp16")]; fp32 var_4531_epsilon_0 = const()[name = string("op_4531_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4531_cast_fp16 = rsqrt(epsilon = var_4531_epsilon_0, x = var_4530_cast_fp16)[name = string("op_4531_cast_fp16")]; tensor hidden_states_1073_cast_fp16 = mul(x = hidden_states_1069_cast_fp16, y = var_4531_cast_fp16)[name = string("hidden_states_1073_cast_fp16")]; tensor layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1038467008)))]; tensor var_4534_cast_fp16 = mul(x = layers_23_self_attn_q_norm_weight_to_fp16, y = hidden_states_1073_cast_fp16)[name = string("op_4534_cast_fp16")]; tensor q_47_perm_0 = const()[name = string("q_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_23_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_23_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1038467328)))]; tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_k_proj_weight_to_fp16, x = hidden_states_1067_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1077_cast_fp16 = reshape(shape = concat_187x, x = linear_162_cast_fp16)[name = string("hidden_states_1077_cast_fp16")]; fp16 var_4490_promoted_2_to_fp16 = const()[name = string("op_4490_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_4542_cast_fp16 = pow(x = hidden_states_1077_cast_fp16, y = var_4490_promoted_2_to_fp16)[name = string("op_4542_cast_fp16")]; tensor variance_189_axes_0 = const()[name = string("variance_189_axes_0"), val = tensor([-1])]; bool variance_189_keep_dims_0 = const()[name = string("variance_189_keep_dims_0"), val = bool(true)]; tensor variance_189_cast_fp16 = reduce_mean(axes = variance_189_axes_0, keep_dims = variance_189_keep_dims_0, x = var_4542_cast_fp16)[name = string("variance_189_cast_fp16")]; fp16 var_4545_to_fp16 = const()[name = string("op_4545_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4546_cast_fp16 = add(x = variance_189_cast_fp16, y = var_4545_to_fp16)[name = string("op_4546_cast_fp16")]; fp32 var_4547_epsilon_0 = const()[name = string("op_4547_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4547_cast_fp16 = rsqrt(epsilon = var_4547_epsilon_0, x = var_4546_cast_fp16)[name = string("op_4547_cast_fp16")]; tensor hidden_states_1081_cast_fp16 = mul(x = hidden_states_1077_cast_fp16, y = var_4547_cast_fp16)[name = string("hidden_states_1081_cast_fp16")]; tensor layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1040564544)))]; tensor var_4550_cast_fp16 = mul(x = layers_23_self_attn_k_norm_weight_to_fp16, y = hidden_states_1081_cast_fp16)[name = string("op_4550_cast_fp16")]; tensor k_47_perm_0 = const()[name = string("k_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_23_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_23_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1040564864)))]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_v_proj_weight_to_fp16, x = hidden_states_1067_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 8, 128])]; tensor var_4555_cast_fp16 = reshape(shape = concat_188x, x = linear_163_cast_fp16)[name = string("op_4555_cast_fp16")]; tensor hidden_states_1089_perm_0 = const()[name = string("hidden_states_1089_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_47_cast_fp16 = transpose(perm = q_47_perm_0, x = var_4534_cast_fp16)[name = string("transpose_19")]; tensor var_4559_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4559_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4570_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_4570_cast_fp16")]; bool var_4572_interleave_0 = const()[name = string("op_4572_interleave_0"), val = bool(false)]; tensor var_4572_cast_fp16 = concat(axis = var_4491, interleave = var_4572_interleave_0, values = (var_4570_cast_fp16, x1_93_cast_fp16))[name = string("op_4572_cast_fp16")]; tensor var_4573_cast_fp16 = mul(x = var_4572_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4573_cast_fp16")]; tensor query_47_cast_fp16 = add(x = var_4559_cast_fp16, y = var_4573_cast_fp16)[name = string("query_47_cast_fp16")]; tensor k_47_cast_fp16 = transpose(perm = k_47_perm_0, x = var_4550_cast_fp16)[name = string("transpose_18")]; tensor var_4575_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4575_cast_fp16")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4586_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_4586_cast_fp16")]; bool var_4588_interleave_0 = const()[name = string("op_4588_interleave_0"), val = bool(false)]; tensor var_4588_cast_fp16 = concat(axis = var_4491, interleave = var_4588_interleave_0, values = (var_4586_cast_fp16, x1_95_cast_fp16))[name = string("op_4588_cast_fp16")]; tensor var_4589_cast_fp16 = mul(x = var_4588_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4589_cast_fp16")]; tensor hidden_states_1085_cast_fp16 = add(x = var_4575_cast_fp16, y = var_4589_cast_fp16)[name = string("hidden_states_1085_cast_fp16")]; tensor var_4591_shape_cast_fp16 = shape(x = hidden_states_1085_cast_fp16)[name = string("op_4591_shape_cast_fp16")]; int32 gather_330 = const()[name = string("gather_330"), val = int32(1)]; int32 gather_331 = const()[name = string("gather_331"), val = int32(8)]; int32 gather_332_axis_0 = const()[name = string("gather_332_axis_0"), val = int32(0)]; int32 gather_332_batch_dims_0 = const()[name = string("gather_332_batch_dims_0"), val = int32(0)]; bool gather_332_validate_indices_0 = const()[name = string("gather_332_validate_indices_0"), val = bool(false)]; string var_4591_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4591_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_332_indices_0_to_uint16 = const()[name = string("gather_332_indices_0_to_uint16"), val = uint16(2)]; tensor var_4591_shape_cast_fp16_to_uint16 = cast(dtype = var_4591_shape_cast_fp16_to_uint16_dtype_0, x = var_4591_shape_cast_fp16)[name = string("cast_253")]; uint16 gather_332_cast_uint16 = gather(axis = gather_332_axis_0, batch_dims = gather_332_batch_dims_0, indices = gather_332_indices_0_to_uint16, validate_indices = gather_332_validate_indices_0, x = var_4591_shape_cast_fp16_to_uint16)[name = string("gather_332_cast_uint16")]; string gather_332_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_332_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_333 = const()[name = string("gather_333"), val = int32(128)]; tensor var_4598_axes_0 = const()[name = string("op_4598_axes_0"), val = tensor([2])]; tensor var_4598_cast_fp16 = expand_dims(axes = var_4598_axes_0, x = hidden_states_1085_cast_fp16)[name = string("op_4598_cast_fp16")]; int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; int32 gather_332_cast_uint16_to_int32 = cast(dtype = gather_332_cast_uint16_to_int32_dtype_0, x = gather_332_cast_uint16)[name = string("cast_252")]; tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (gather_330, gather_331, var_4490, gather_332_cast_uint16_to_int32, gather_333))[name = string("concat_189")]; tensor shape_46_cast_fp16 = shape(x = var_4598_cast_fp16)[name = string("shape_46_cast_fp16")]; int32 equal_46_y_0 = const()[name = string("equal_46_y_0"), val = int32(-1)]; tensor equal_46 = equal(x = concat_189, y = equal_46_y_0)[name = string("equal_46")]; tensor select_46 = select(a = shape_46_cast_fp16, b = concat_189, cond = equal_46)[name = string("select_46")]; tensor real_div_46 = real_div(x = select_46, y = shape_46_cast_fp16)[name = string("real_div_46")]; tensor hidden_states_1087_cast_fp16 = tile(reps = real_div_46, x = var_4598_cast_fp16)[name = string("hidden_states_1087_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, 16, -1, 128])]; tensor key_states_47_cast_fp16 = reshape(shape = concat_190x, x = hidden_states_1087_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor hidden_states_1089_cast_fp16 = transpose(perm = hidden_states_1089_perm_0, x = var_4555_cast_fp16)[name = string("transpose_17")]; tensor var_4608_shape_cast_fp16 = shape(x = hidden_states_1089_cast_fp16)[name = string("op_4608_shape_cast_fp16")]; int32 gather_334 = const()[name = string("gather_334"), val = int32(1)]; int32 gather_335 = const()[name = string("gather_335"), val = int32(8)]; int32 gather_336_axis_0 = const()[name = string("gather_336_axis_0"), val = int32(0)]; int32 gather_336_batch_dims_0 = const()[name = string("gather_336_batch_dims_0"), val = int32(0)]; bool gather_336_validate_indices_0 = const()[name = string("gather_336_validate_indices_0"), val = bool(false)]; string var_4608_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4608_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_336_indices_0_to_uint16 = const()[name = string("gather_336_indices_0_to_uint16"), val = uint16(2)]; tensor var_4608_shape_cast_fp16_to_uint16 = cast(dtype = var_4608_shape_cast_fp16_to_uint16_dtype_0, x = var_4608_shape_cast_fp16)[name = string("cast_251")]; uint16 gather_336_cast_uint16 = gather(axis = gather_336_axis_0, batch_dims = gather_336_batch_dims_0, indices = gather_336_indices_0_to_uint16, validate_indices = gather_336_validate_indices_0, x = var_4608_shape_cast_fp16_to_uint16)[name = string("gather_336_cast_uint16")]; string gather_336_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_336_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_337 = const()[name = string("gather_337"), val = int32(128)]; tensor var_4615_axes_0 = const()[name = string("op_4615_axes_0"), val = tensor([2])]; tensor var_4615_cast_fp16 = expand_dims(axes = var_4615_axes_0, x = hidden_states_1089_cast_fp16)[name = string("op_4615_cast_fp16")]; int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; int32 gather_336_cast_uint16_to_int32 = cast(dtype = gather_336_cast_uint16_to_int32_dtype_0, x = gather_336_cast_uint16)[name = string("cast_250")]; tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_334, gather_335, var_4490, gather_336_cast_uint16_to_int32, gather_337))[name = string("concat_191")]; tensor shape_47_cast_fp16 = shape(x = var_4615_cast_fp16)[name = string("shape_47_cast_fp16")]; int32 equal_47_y_0 = const()[name = string("equal_47_y_0"), val = int32(-1)]; tensor equal_47 = equal(x = concat_191, y = equal_47_y_0)[name = string("equal_47")]; tensor select_47 = select(a = shape_47_cast_fp16, b = concat_191, cond = equal_47)[name = string("select_47")]; tensor real_div_47 = real_div(x = select_47, y = shape_47_cast_fp16)[name = string("real_div_47")]; tensor hidden_states_1091_cast_fp16 = tile(reps = real_div_47, x = var_4615_cast_fp16)[name = string("hidden_states_1091_cast_fp16")]; tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, 16, -1, 128])]; tensor value_states_47_cast_fp16 = reshape(shape = concat_192x, x = hidden_states_1091_cast_fp16)[name = string("value_states_47_cast_fp16")]; bool var_4626_transpose_x_1 = const()[name = string("op_4626_transpose_x_1"), val = bool(false)]; bool var_4626_transpose_y_1 = const()[name = string("op_4626_transpose_y_1"), val = bool(true)]; tensor var_4626_cast_fp16 = matmul(transpose_x = var_4626_transpose_x_1, transpose_y = var_4626_transpose_y_1, x = query_47_cast_fp16, y = key_states_47_cast_fp16)[name = string("op_4626_cast_fp16")]; fp16 var_4627_to_fp16 = const()[name = string("op_4627_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_93_cast_fp16 = mul(x = var_4626_cast_fp16, y = var_4627_to_fp16)[name = string("attn_weights_93_cast_fp16")]; tensor input_279_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_279_cast_fp16")]; tensor var_4630_cast_fp16 = softmax(axis = var_4491, x = input_279_cast_fp16)[name = string("op_4630_cast_fp16")]; bool attn_output_93_transpose_x_0 = const()[name = string("attn_output_93_transpose_x_0"), val = bool(false)]; bool attn_output_93_transpose_y_0 = const()[name = string("attn_output_93_transpose_y_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = matmul(transpose_x = attn_output_93_transpose_x_0, transpose_y = attn_output_93_transpose_y_0, x = var_4630_cast_fp16, y = value_states_47_cast_fp16)[name = string("attn_output_93_cast_fp16")]; tensor var_4634_perm_0 = const()[name = string("op_4634_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_193_axis_0 = const()[name = string("concat_193_axis_0"), val = int32(0)]; bool concat_193_interleave_0 = const()[name = string("concat_193_interleave_0"), val = bool(false)]; int32 gather_325_cast_uint16_to_int32 = cast(dtype = gather_325_cast_uint16_to_int32_dtype_0, x = gather_325_cast_uint16)[name = string("cast_254")]; tensor concat_193 = concat(axis = concat_193_axis_0, interleave = concat_193_interleave_0, values = (gather_324, gather_325_cast_uint16_to_int32, var_4491))[name = string("concat_193")]; tensor var_4634_cast_fp16 = transpose(perm = var_4634_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_16")]; tensor var_4637_cast_fp16 = reshape(shape = concat_193, x = var_4634_cast_fp16)[name = string("op_4637_cast_fp16")]; tensor layers_23_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_23_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042662080)))]; tensor linear_164_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_o_proj_weight_to_fp16, x = var_4637_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor hidden_states_1095_cast_fp16 = add(x = hidden_states_1059_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_1095_cast_fp16")]; fp16 var_4490_promoted_3_to_fp16 = const()[name = string("op_4490_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4644_cast_fp16 = pow(x = hidden_states_1095_cast_fp16, y = var_4490_promoted_3_to_fp16)[name = string("op_4644_cast_fp16")]; tensor variance_191_axes_0 = const()[name = string("variance_191_axes_0"), val = tensor([-1])]; bool variance_191_keep_dims_0 = const()[name = string("variance_191_keep_dims_0"), val = bool(true)]; tensor variance_191_cast_fp16 = reduce_mean(axes = variance_191_axes_0, keep_dims = variance_191_keep_dims_0, x = var_4644_cast_fp16)[name = string("variance_191_cast_fp16")]; fp16 var_4647_to_fp16 = const()[name = string("op_4647_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4648_cast_fp16 = add(x = variance_191_cast_fp16, y = var_4647_to_fp16)[name = string("op_4648_cast_fp16")]; fp32 var_4649_epsilon_0 = const()[name = string("op_4649_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4649_cast_fp16 = rsqrt(epsilon = var_4649_epsilon_0, x = var_4648_cast_fp16)[name = string("op_4649_cast_fp16")]; tensor hidden_states_1099_cast_fp16 = mul(x = hidden_states_1095_cast_fp16, y = var_4649_cast_fp16)[name = string("hidden_states_1099_cast_fp16")]; tensor layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1046856448)))]; tensor input_285_cast_fp16 = mul(x = layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_1099_cast_fp16)[name = string("input_285_cast_fp16")]; tensor layers_23_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_23_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1046858560)))]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_gate_proj_weight_to_fp16, x = input_285_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_4661_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_4661_cast_fp16")]; tensor layers_23_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_23_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1053150080)))]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_up_proj_weight_to_fp16, x = input_285_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_289_cast_fp16 = mul(x = var_4661_cast_fp16, y = linear_166_cast_fp16)[name = string("input_289_cast_fp16")]; tensor layers_23_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_23_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059441600)))]; tensor linear_167_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_mlp_down_proj_weight_to_fp16, x = input_289_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor hidden_states_1105_cast_fp16 = add(x = hidden_states_1095_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_1105_cast_fp16")]; int32 var_4678 = const()[name = string("op_4678"), val = int32(2)]; int32 var_4679 = const()[name = string("op_4679"), val = int32(-1)]; fp16 var_4678_promoted_to_fp16 = const()[name = string("op_4678_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4688_cast_fp16 = pow(x = hidden_states_1105_cast_fp16, y = var_4678_promoted_to_fp16)[name = string("op_4688_cast_fp16")]; tensor variance_193_axes_0 = const()[name = string("variance_193_axes_0"), val = tensor([-1])]; bool variance_193_keep_dims_0 = const()[name = string("variance_193_keep_dims_0"), val = bool(true)]; tensor variance_193_cast_fp16 = reduce_mean(axes = variance_193_axes_0, keep_dims = variance_193_keep_dims_0, x = var_4688_cast_fp16)[name = string("variance_193_cast_fp16")]; fp16 var_4691_to_fp16 = const()[name = string("op_4691_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4692_cast_fp16 = add(x = variance_193_cast_fp16, y = var_4691_to_fp16)[name = string("op_4692_cast_fp16")]; fp32 var_4693_epsilon_0 = const()[name = string("op_4693_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4693_cast_fp16 = rsqrt(epsilon = var_4693_epsilon_0, x = var_4692_cast_fp16)[name = string("op_4693_cast_fp16")]; tensor hidden_states_1109_cast_fp16 = mul(x = hidden_states_1105_cast_fp16, y = var_4693_cast_fp16)[name = string("hidden_states_1109_cast_fp16")]; tensor layers_24_input_layernorm_weight_to_fp16 = const()[name = string("layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065733120)))]; tensor hidden_states_1113_cast_fp16 = mul(x = layers_24_input_layernorm_weight_to_fp16, y = hidden_states_1109_cast_fp16)[name = string("hidden_states_1113_cast_fp16")]; tensor var_4706_shape_cast_fp16 = shape(x = hidden_states_1113_cast_fp16)[name = string("op_4706_shape_cast_fp16")]; int32 gather_338 = const()[name = string("gather_338"), val = int32(1)]; int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; string var_4706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_339_indices_0_to_uint16 = const()[name = string("gather_339_indices_0_to_uint16"), val = uint16(1)]; tensor var_4706_shape_cast_fp16_to_uint16 = cast(dtype = var_4706_shape_cast_fp16_to_uint16_dtype_0, x = var_4706_shape_cast_fp16)[name = string("cast_249")]; uint16 gather_339_cast_uint16 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = gather_339_indices_0_to_uint16, validate_indices = gather_339_validate_indices_0, x = var_4706_shape_cast_fp16_to_uint16)[name = string("gather_339_cast_uint16")]; string gather_339_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_339_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_24_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_24_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1065735232)))]; tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_24_self_attn_q_proj_weight_to_fp16, x = hidden_states_1113_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor concat_194x = const()[name = string("concat_194x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1115_cast_fp16 = reshape(shape = concat_194x, x = linear_168_cast_fp16)[name = string("hidden_states_1115_cast_fp16")]; fp16 var_4678_promoted_1_to_fp16 = const()[name = string("op_4678_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_4714_cast_fp16 = pow(x = hidden_states_1115_cast_fp16, y = var_4678_promoted_1_to_fp16)[name = string("op_4714_cast_fp16")]; tensor variance_195_axes_0 = const()[name = string("variance_195_axes_0"), val = tensor([-1])]; bool variance_195_keep_dims_0 = const()[name = string("variance_195_keep_dims_0"), val = bool(true)]; tensor variance_195_cast_fp16 = reduce_mean(axes = variance_195_axes_0, keep_dims = variance_195_keep_dims_0, x = var_4714_cast_fp16)[name = string("variance_195_cast_fp16")]; fp16 var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4718_cast_fp16 = add(x = variance_195_cast_fp16, y = var_4717_to_fp16)[name = string("op_4718_cast_fp16")]; fp32 var_4719_epsilon_0 = const()[name = string("op_4719_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4719_cast_fp16 = rsqrt(epsilon = var_4719_epsilon_0, x = var_4718_cast_fp16)[name = string("op_4719_cast_fp16")]; tensor hidden_states_1119_cast_fp16 = mul(x = hidden_states_1115_cast_fp16, y = var_4719_cast_fp16)[name = string("hidden_states_1119_cast_fp16")]; tensor layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069929600)))]; tensor var_4722_cast_fp16 = mul(x = layers_24_self_attn_q_norm_weight_to_fp16, y = hidden_states_1119_cast_fp16)[name = string("op_4722_cast_fp16")]; tensor q_49_perm_0 = const()[name = string("q_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_24_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_24_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069929920)))]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_k_proj_weight_to_fp16, x = hidden_states_1113_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor concat_195x = const()[name = string("concat_195x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1123_cast_fp16 = reshape(shape = concat_195x, x = linear_169_cast_fp16)[name = string("hidden_states_1123_cast_fp16")]; fp16 var_4678_promoted_2_to_fp16 = const()[name = string("op_4678_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_4730_cast_fp16 = pow(x = hidden_states_1123_cast_fp16, y = var_4678_promoted_2_to_fp16)[name = string("op_4730_cast_fp16")]; tensor variance_197_axes_0 = const()[name = string("variance_197_axes_0"), val = tensor([-1])]; bool variance_197_keep_dims_0 = const()[name = string("variance_197_keep_dims_0"), val = bool(true)]; tensor variance_197_cast_fp16 = reduce_mean(axes = variance_197_axes_0, keep_dims = variance_197_keep_dims_0, x = var_4730_cast_fp16)[name = string("variance_197_cast_fp16")]; fp16 var_4733_to_fp16 = const()[name = string("op_4733_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4734_cast_fp16 = add(x = variance_197_cast_fp16, y = var_4733_to_fp16)[name = string("op_4734_cast_fp16")]; fp32 var_4735_epsilon_0 = const()[name = string("op_4735_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4735_cast_fp16 = rsqrt(epsilon = var_4735_epsilon_0, x = var_4734_cast_fp16)[name = string("op_4735_cast_fp16")]; tensor hidden_states_1127_cast_fp16 = mul(x = hidden_states_1123_cast_fp16, y = var_4735_cast_fp16)[name = string("hidden_states_1127_cast_fp16")]; tensor layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072027136)))]; tensor var_4738_cast_fp16 = mul(x = layers_24_self_attn_k_norm_weight_to_fp16, y = hidden_states_1127_cast_fp16)[name = string("op_4738_cast_fp16")]; tensor k_49_perm_0 = const()[name = string("k_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_24_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_24_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072027456)))]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_v_proj_weight_to_fp16, x = hidden_states_1113_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor concat_196x = const()[name = string("concat_196x"), val = tensor([1, -1, 8, 128])]; tensor var_4743_cast_fp16 = reshape(shape = concat_196x, x = linear_170_cast_fp16)[name = string("op_4743_cast_fp16")]; tensor hidden_states_1135_perm_0 = const()[name = string("hidden_states_1135_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_49_cast_fp16 = transpose(perm = q_49_perm_0, x = var_4722_cast_fp16)[name = string("transpose_15")]; tensor var_4747_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4747_cast_fp16")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4758_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_4758_cast_fp16")]; bool var_4760_interleave_0 = const()[name = string("op_4760_interleave_0"), val = bool(false)]; tensor var_4760_cast_fp16 = concat(axis = var_4679, interleave = var_4760_interleave_0, values = (var_4758_cast_fp16, x1_97_cast_fp16))[name = string("op_4760_cast_fp16")]; tensor var_4761_cast_fp16 = mul(x = var_4760_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4761_cast_fp16")]; tensor query_49_cast_fp16 = add(x = var_4747_cast_fp16, y = var_4761_cast_fp16)[name = string("query_49_cast_fp16")]; tensor k_49_cast_fp16 = transpose(perm = k_49_perm_0, x = var_4738_cast_fp16)[name = string("transpose_14")]; tensor var_4763_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4763_cast_fp16")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4774_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_4774_cast_fp16")]; bool var_4776_interleave_0 = const()[name = string("op_4776_interleave_0"), val = bool(false)]; tensor var_4776_cast_fp16 = concat(axis = var_4679, interleave = var_4776_interleave_0, values = (var_4774_cast_fp16, x1_99_cast_fp16))[name = string("op_4776_cast_fp16")]; tensor var_4777_cast_fp16 = mul(x = var_4776_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4777_cast_fp16")]; tensor hidden_states_1131_cast_fp16 = add(x = var_4763_cast_fp16, y = var_4777_cast_fp16)[name = string("hidden_states_1131_cast_fp16")]; tensor var_4779_shape_cast_fp16 = shape(x = hidden_states_1131_cast_fp16)[name = string("op_4779_shape_cast_fp16")]; int32 gather_344 = const()[name = string("gather_344"), val = int32(1)]; int32 gather_345 = const()[name = string("gather_345"), val = int32(8)]; int32 gather_346_axis_0 = const()[name = string("gather_346_axis_0"), val = int32(0)]; int32 gather_346_batch_dims_0 = const()[name = string("gather_346_batch_dims_0"), val = int32(0)]; bool gather_346_validate_indices_0 = const()[name = string("gather_346_validate_indices_0"), val = bool(false)]; string var_4779_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4779_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_346_indices_0_to_uint16 = const()[name = string("gather_346_indices_0_to_uint16"), val = uint16(2)]; tensor var_4779_shape_cast_fp16_to_uint16 = cast(dtype = var_4779_shape_cast_fp16_to_uint16_dtype_0, x = var_4779_shape_cast_fp16)[name = string("cast_247")]; uint16 gather_346_cast_uint16 = gather(axis = gather_346_axis_0, batch_dims = gather_346_batch_dims_0, indices = gather_346_indices_0_to_uint16, validate_indices = gather_346_validate_indices_0, x = var_4779_shape_cast_fp16_to_uint16)[name = string("gather_346_cast_uint16")]; string gather_346_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_346_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_347 = const()[name = string("gather_347"), val = int32(128)]; tensor var_4786_axes_0 = const()[name = string("op_4786_axes_0"), val = tensor([2])]; tensor var_4786_cast_fp16 = expand_dims(axes = var_4786_axes_0, x = hidden_states_1131_cast_fp16)[name = string("op_4786_cast_fp16")]; int32 concat_197_axis_0 = const()[name = string("concat_197_axis_0"), val = int32(0)]; bool concat_197_interleave_0 = const()[name = string("concat_197_interleave_0"), val = bool(false)]; int32 gather_346_cast_uint16_to_int32 = cast(dtype = gather_346_cast_uint16_to_int32_dtype_0, x = gather_346_cast_uint16)[name = string("cast_246")]; tensor concat_197 = concat(axis = concat_197_axis_0, interleave = concat_197_interleave_0, values = (gather_344, gather_345, var_4678, gather_346_cast_uint16_to_int32, gather_347))[name = string("concat_197")]; tensor shape_48_cast_fp16 = shape(x = var_4786_cast_fp16)[name = string("shape_48_cast_fp16")]; int32 equal_48_y_0 = const()[name = string("equal_48_y_0"), val = int32(-1)]; tensor equal_48 = equal(x = concat_197, y = equal_48_y_0)[name = string("equal_48")]; tensor select_48 = select(a = shape_48_cast_fp16, b = concat_197, cond = equal_48)[name = string("select_48")]; tensor real_div_48 = real_div(x = select_48, y = shape_48_cast_fp16)[name = string("real_div_48")]; tensor hidden_states_1133_cast_fp16 = tile(reps = real_div_48, x = var_4786_cast_fp16)[name = string("hidden_states_1133_cast_fp16")]; tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, 16, -1, 128])]; tensor key_states_49_cast_fp16 = reshape(shape = concat_198x, x = hidden_states_1133_cast_fp16)[name = string("key_states_49_cast_fp16")]; tensor hidden_states_1135_cast_fp16 = transpose(perm = hidden_states_1135_perm_0, x = var_4743_cast_fp16)[name = string("transpose_13")]; tensor var_4796_shape_cast_fp16 = shape(x = hidden_states_1135_cast_fp16)[name = string("op_4796_shape_cast_fp16")]; int32 gather_348 = const()[name = string("gather_348"), val = int32(1)]; int32 gather_349 = const()[name = string("gather_349"), val = int32(8)]; int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)]; int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)]; bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)]; string var_4796_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4796_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_350_indices_0_to_uint16 = const()[name = string("gather_350_indices_0_to_uint16"), val = uint16(2)]; tensor var_4796_shape_cast_fp16_to_uint16 = cast(dtype = var_4796_shape_cast_fp16_to_uint16_dtype_0, x = var_4796_shape_cast_fp16)[name = string("cast_245")]; uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = gather_350_indices_0_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_4796_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")]; string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_351 = const()[name = string("gather_351"), val = int32(128)]; tensor var_4803_axes_0 = const()[name = string("op_4803_axes_0"), val = tensor([2])]; tensor var_4803_cast_fp16 = expand_dims(axes = var_4803_axes_0, x = hidden_states_1135_cast_fp16)[name = string("op_4803_cast_fp16")]; int32 concat_199_axis_0 = const()[name = string("concat_199_axis_0"), val = int32(0)]; bool concat_199_interleave_0 = const()[name = string("concat_199_interleave_0"), val = bool(false)]; int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_244")]; tensor concat_199 = concat(axis = concat_199_axis_0, interleave = concat_199_interleave_0, values = (gather_348, gather_349, var_4678, gather_350_cast_uint16_to_int32, gather_351))[name = string("concat_199")]; tensor shape_49_cast_fp16 = shape(x = var_4803_cast_fp16)[name = string("shape_49_cast_fp16")]; int32 equal_49_y_0 = const()[name = string("equal_49_y_0"), val = int32(-1)]; tensor equal_49 = equal(x = concat_199, y = equal_49_y_0)[name = string("equal_49")]; tensor select_49 = select(a = shape_49_cast_fp16, b = concat_199, cond = equal_49)[name = string("select_49")]; tensor real_div_49 = real_div(x = select_49, y = shape_49_cast_fp16)[name = string("real_div_49")]; tensor hidden_states_1137_cast_fp16 = tile(reps = real_div_49, x = var_4803_cast_fp16)[name = string("hidden_states_1137_cast_fp16")]; tensor concat_200x = const()[name = string("concat_200x"), val = tensor([1, 16, -1, 128])]; tensor value_states_49_cast_fp16 = reshape(shape = concat_200x, x = hidden_states_1137_cast_fp16)[name = string("value_states_49_cast_fp16")]; bool var_4814_transpose_x_1 = const()[name = string("op_4814_transpose_x_1"), val = bool(false)]; bool var_4814_transpose_y_1 = const()[name = string("op_4814_transpose_y_1"), val = bool(true)]; tensor var_4814_cast_fp16 = matmul(transpose_x = var_4814_transpose_x_1, transpose_y = var_4814_transpose_y_1, x = query_49_cast_fp16, y = key_states_49_cast_fp16)[name = string("op_4814_cast_fp16")]; fp16 var_4815_to_fp16 = const()[name = string("op_4815_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_4814_cast_fp16, y = var_4815_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor input_291_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_291_cast_fp16")]; tensor var_4818_cast_fp16 = softmax(axis = var_4679, x = input_291_cast_fp16)[name = string("op_4818_cast_fp16")]; bool attn_output_97_transpose_x_0 = const()[name = string("attn_output_97_transpose_x_0"), val = bool(false)]; bool attn_output_97_transpose_y_0 = const()[name = string("attn_output_97_transpose_y_0"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_0, transpose_y = attn_output_97_transpose_y_0, x = var_4818_cast_fp16, y = value_states_49_cast_fp16)[name = string("attn_output_97_cast_fp16")]; tensor var_4822_perm_0 = const()[name = string("op_4822_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; int32 gather_339_cast_uint16_to_int32 = cast(dtype = gather_339_cast_uint16_to_int32_dtype_0, x = gather_339_cast_uint16)[name = string("cast_248")]; tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (gather_338, gather_339_cast_uint16_to_int32, var_4679))[name = string("concat_201")]; tensor var_4822_cast_fp16 = transpose(perm = var_4822_perm_0, x = attn_output_97_cast_fp16)[name = string("transpose_12")]; tensor var_4825_cast_fp16 = reshape(shape = concat_201, x = var_4822_cast_fp16)[name = string("op_4825_cast_fp16")]; tensor layers_24_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_24_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074124672)))]; tensor linear_171_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_o_proj_weight_to_fp16, x = var_4825_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor hidden_states_1141_cast_fp16 = add(x = hidden_states_1105_cast_fp16, y = linear_171_cast_fp16)[name = string("hidden_states_1141_cast_fp16")]; fp16 var_4678_promoted_3_to_fp16 = const()[name = string("op_4678_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4832_cast_fp16 = pow(x = hidden_states_1141_cast_fp16, y = var_4678_promoted_3_to_fp16)[name = string("op_4832_cast_fp16")]; tensor variance_199_axes_0 = const()[name = string("variance_199_axes_0"), val = tensor([-1])]; bool variance_199_keep_dims_0 = const()[name = string("variance_199_keep_dims_0"), val = bool(true)]; tensor variance_199_cast_fp16 = reduce_mean(axes = variance_199_axes_0, keep_dims = variance_199_keep_dims_0, x = var_4832_cast_fp16)[name = string("variance_199_cast_fp16")]; fp16 var_4835_to_fp16 = const()[name = string("op_4835_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4836_cast_fp16 = add(x = variance_199_cast_fp16, y = var_4835_to_fp16)[name = string("op_4836_cast_fp16")]; fp32 var_4837_epsilon_0 = const()[name = string("op_4837_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4837_cast_fp16 = rsqrt(epsilon = var_4837_epsilon_0, x = var_4836_cast_fp16)[name = string("op_4837_cast_fp16")]; tensor hidden_states_1145_cast_fp16 = mul(x = hidden_states_1141_cast_fp16, y = var_4837_cast_fp16)[name = string("hidden_states_1145_cast_fp16")]; tensor layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078319040)))]; tensor input_297_cast_fp16 = mul(x = layers_24_post_attention_layernorm_weight_to_fp16, y = hidden_states_1145_cast_fp16)[name = string("input_297_cast_fp16")]; tensor layers_24_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_24_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078321152)))]; tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_gate_proj_weight_to_fp16, x = input_297_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor var_4849_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_4849_cast_fp16")]; tensor layers_24_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_24_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1084612672)))]; tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_up_proj_weight_to_fp16, x = input_297_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor input_301_cast_fp16 = mul(x = var_4849_cast_fp16, y = linear_173_cast_fp16)[name = string("input_301_cast_fp16")]; tensor layers_24_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_24_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1090904192)))]; tensor linear_174_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_mlp_down_proj_weight_to_fp16, x = input_301_cast_fp16)[name = string("linear_174_cast_fp16")]; tensor hidden_states_1151_cast_fp16 = add(x = hidden_states_1141_cast_fp16, y = linear_174_cast_fp16)[name = string("hidden_states_1151_cast_fp16")]; int32 var_4866 = const()[name = string("op_4866"), val = int32(2)]; int32 var_4867 = const()[name = string("op_4867"), val = int32(-1)]; fp16 var_4866_promoted_to_fp16 = const()[name = string("op_4866_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4876_cast_fp16 = pow(x = hidden_states_1151_cast_fp16, y = var_4866_promoted_to_fp16)[name = string("op_4876_cast_fp16")]; tensor variance_201_axes_0 = const()[name = string("variance_201_axes_0"), val = tensor([-1])]; bool variance_201_keep_dims_0 = const()[name = string("variance_201_keep_dims_0"), val = bool(true)]; tensor variance_201_cast_fp16 = reduce_mean(axes = variance_201_axes_0, keep_dims = variance_201_keep_dims_0, x = var_4876_cast_fp16)[name = string("variance_201_cast_fp16")]; fp16 var_4879_to_fp16 = const()[name = string("op_4879_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4880_cast_fp16 = add(x = variance_201_cast_fp16, y = var_4879_to_fp16)[name = string("op_4880_cast_fp16")]; fp32 var_4881_epsilon_0 = const()[name = string("op_4881_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4881_cast_fp16 = rsqrt(epsilon = var_4881_epsilon_0, x = var_4880_cast_fp16)[name = string("op_4881_cast_fp16")]; tensor hidden_states_1155_cast_fp16 = mul(x = hidden_states_1151_cast_fp16, y = var_4881_cast_fp16)[name = string("hidden_states_1155_cast_fp16")]; tensor layers_25_input_layernorm_weight_to_fp16 = const()[name = string("layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097195712)))]; tensor hidden_states_1159_cast_fp16 = mul(x = layers_25_input_layernorm_weight_to_fp16, y = hidden_states_1155_cast_fp16)[name = string("hidden_states_1159_cast_fp16")]; tensor var_4894_shape_cast_fp16 = shape(x = hidden_states_1159_cast_fp16)[name = string("op_4894_shape_cast_fp16")]; int32 gather_352 = const()[name = string("gather_352"), val = int32(1)]; int32 gather_353_axis_0 = const()[name = string("gather_353_axis_0"), val = int32(0)]; int32 gather_353_batch_dims_0 = const()[name = string("gather_353_batch_dims_0"), val = int32(0)]; bool gather_353_validate_indices_0 = const()[name = string("gather_353_validate_indices_0"), val = bool(false)]; string var_4894_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4894_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_353_indices_0_to_uint16 = const()[name = string("gather_353_indices_0_to_uint16"), val = uint16(1)]; tensor var_4894_shape_cast_fp16_to_uint16 = cast(dtype = var_4894_shape_cast_fp16_to_uint16_dtype_0, x = var_4894_shape_cast_fp16)[name = string("cast_243")]; uint16 gather_353_cast_uint16 = gather(axis = gather_353_axis_0, batch_dims = gather_353_batch_dims_0, indices = gather_353_indices_0_to_uint16, validate_indices = gather_353_validate_indices_0, x = var_4894_shape_cast_fp16_to_uint16)[name = string("gather_353_cast_uint16")]; string gather_353_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_353_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_25_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_25_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097197824)))]; tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_25_self_attn_q_proj_weight_to_fp16, x = hidden_states_1159_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor concat_202x = const()[name = string("concat_202x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1161_cast_fp16 = reshape(shape = concat_202x, x = linear_175_cast_fp16)[name = string("hidden_states_1161_cast_fp16")]; fp16 var_4866_promoted_1_to_fp16 = const()[name = string("op_4866_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_4902_cast_fp16 = pow(x = hidden_states_1161_cast_fp16, y = var_4866_promoted_1_to_fp16)[name = string("op_4902_cast_fp16")]; tensor variance_203_axes_0 = const()[name = string("variance_203_axes_0"), val = tensor([-1])]; bool variance_203_keep_dims_0 = const()[name = string("variance_203_keep_dims_0"), val = bool(true)]; tensor variance_203_cast_fp16 = reduce_mean(axes = variance_203_axes_0, keep_dims = variance_203_keep_dims_0, x = var_4902_cast_fp16)[name = string("variance_203_cast_fp16")]; fp16 var_4905_to_fp16 = const()[name = string("op_4905_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4906_cast_fp16 = add(x = variance_203_cast_fp16, y = var_4905_to_fp16)[name = string("op_4906_cast_fp16")]; fp32 var_4907_epsilon_0 = const()[name = string("op_4907_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4907_cast_fp16 = rsqrt(epsilon = var_4907_epsilon_0, x = var_4906_cast_fp16)[name = string("op_4907_cast_fp16")]; tensor hidden_states_1165_cast_fp16 = mul(x = hidden_states_1161_cast_fp16, y = var_4907_cast_fp16)[name = string("hidden_states_1165_cast_fp16")]; tensor layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1101392192)))]; tensor var_4910_cast_fp16 = mul(x = layers_25_self_attn_q_norm_weight_to_fp16, y = hidden_states_1165_cast_fp16)[name = string("op_4910_cast_fp16")]; tensor q_51_perm_0 = const()[name = string("q_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_25_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_25_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1101392512)))]; tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_k_proj_weight_to_fp16, x = hidden_states_1159_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor concat_203x = const()[name = string("concat_203x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1169_cast_fp16 = reshape(shape = concat_203x, x = linear_176_cast_fp16)[name = string("hidden_states_1169_cast_fp16")]; fp16 var_4866_promoted_2_to_fp16 = const()[name = string("op_4866_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_4918_cast_fp16 = pow(x = hidden_states_1169_cast_fp16, y = var_4866_promoted_2_to_fp16)[name = string("op_4918_cast_fp16")]; tensor variance_205_axes_0 = const()[name = string("variance_205_axes_0"), val = tensor([-1])]; bool variance_205_keep_dims_0 = const()[name = string("variance_205_keep_dims_0"), val = bool(true)]; tensor variance_205_cast_fp16 = reduce_mean(axes = variance_205_axes_0, keep_dims = variance_205_keep_dims_0, x = var_4918_cast_fp16)[name = string("variance_205_cast_fp16")]; fp16 var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4922_cast_fp16 = add(x = variance_205_cast_fp16, y = var_4921_to_fp16)[name = string("op_4922_cast_fp16")]; fp32 var_4923_epsilon_0 = const()[name = string("op_4923_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4923_cast_fp16 = rsqrt(epsilon = var_4923_epsilon_0, x = var_4922_cast_fp16)[name = string("op_4923_cast_fp16")]; tensor hidden_states_1173_cast_fp16 = mul(x = hidden_states_1169_cast_fp16, y = var_4923_cast_fp16)[name = string("hidden_states_1173_cast_fp16")]; tensor layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1103489728)))]; tensor var_4926_cast_fp16 = mul(x = layers_25_self_attn_k_norm_weight_to_fp16, y = hidden_states_1173_cast_fp16)[name = string("op_4926_cast_fp16")]; tensor k_51_perm_0 = const()[name = string("k_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_25_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_25_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1103490048)))]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_v_proj_weight_to_fp16, x = hidden_states_1159_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, -1, 8, 128])]; tensor var_4931_cast_fp16 = reshape(shape = concat_204x, x = linear_177_cast_fp16)[name = string("op_4931_cast_fp16")]; tensor hidden_states_1181_perm_0 = const()[name = string("hidden_states_1181_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_51_cast_fp16 = transpose(perm = q_51_perm_0, x = var_4910_cast_fp16)[name = string("transpose_11")]; tensor var_4935_cast_fp16 = mul(x = q_51_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4935_cast_fp16")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51_cast_fp16)[name = string("x1_101_cast_fp16")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51_cast_fp16)[name = string("x2_101_cast_fp16")]; fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4946_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_4946_cast_fp16")]; bool var_4948_interleave_0 = const()[name = string("op_4948_interleave_0"), val = bool(false)]; tensor var_4948_cast_fp16 = concat(axis = var_4867, interleave = var_4948_interleave_0, values = (var_4946_cast_fp16, x1_101_cast_fp16))[name = string("op_4948_cast_fp16")]; tensor var_4949_cast_fp16 = mul(x = var_4948_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4949_cast_fp16")]; tensor query_51_cast_fp16 = add(x = var_4935_cast_fp16, y = var_4949_cast_fp16)[name = string("query_51_cast_fp16")]; tensor k_51_cast_fp16 = transpose(perm = k_51_perm_0, x = var_4926_cast_fp16)[name = string("transpose_10")]; tensor var_4951_cast_fp16 = mul(x = k_51_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4951_cast_fp16")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51_cast_fp16)[name = string("x1_103_cast_fp16")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51_cast_fp16)[name = string("x2_103_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4962_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_4962_cast_fp16")]; bool var_4964_interleave_0 = const()[name = string("op_4964_interleave_0"), val = bool(false)]; tensor var_4964_cast_fp16 = concat(axis = var_4867, interleave = var_4964_interleave_0, values = (var_4962_cast_fp16, x1_103_cast_fp16))[name = string("op_4964_cast_fp16")]; tensor var_4965_cast_fp16 = mul(x = var_4964_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4965_cast_fp16")]; tensor hidden_states_1177_cast_fp16 = add(x = var_4951_cast_fp16, y = var_4965_cast_fp16)[name = string("hidden_states_1177_cast_fp16")]; tensor var_4967_shape_cast_fp16 = shape(x = hidden_states_1177_cast_fp16)[name = string("op_4967_shape_cast_fp16")]; int32 gather_358 = const()[name = string("gather_358"), val = int32(1)]; int32 gather_359 = const()[name = string("gather_359"), val = int32(8)]; int32 gather_360_axis_0 = const()[name = string("gather_360_axis_0"), val = int32(0)]; int32 gather_360_batch_dims_0 = const()[name = string("gather_360_batch_dims_0"), val = int32(0)]; bool gather_360_validate_indices_0 = const()[name = string("gather_360_validate_indices_0"), val = bool(false)]; string var_4967_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4967_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_360_indices_0_to_uint16 = const()[name = string("gather_360_indices_0_to_uint16"), val = uint16(2)]; tensor var_4967_shape_cast_fp16_to_uint16 = cast(dtype = var_4967_shape_cast_fp16_to_uint16_dtype_0, x = var_4967_shape_cast_fp16)[name = string("cast_241")]; uint16 gather_360_cast_uint16 = gather(axis = gather_360_axis_0, batch_dims = gather_360_batch_dims_0, indices = gather_360_indices_0_to_uint16, validate_indices = gather_360_validate_indices_0, x = var_4967_shape_cast_fp16_to_uint16)[name = string("gather_360_cast_uint16")]; string gather_360_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_360_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_361 = const()[name = string("gather_361"), val = int32(128)]; tensor var_4974_axes_0 = const()[name = string("op_4974_axes_0"), val = tensor([2])]; tensor var_4974_cast_fp16 = expand_dims(axes = var_4974_axes_0, x = hidden_states_1177_cast_fp16)[name = string("op_4974_cast_fp16")]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; int32 gather_360_cast_uint16_to_int32 = cast(dtype = gather_360_cast_uint16_to_int32_dtype_0, x = gather_360_cast_uint16)[name = string("cast_240")]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_358, gather_359, var_4866, gather_360_cast_uint16_to_int32, gather_361))[name = string("concat_205")]; tensor shape_50_cast_fp16 = shape(x = var_4974_cast_fp16)[name = string("shape_50_cast_fp16")]; int32 equal_50_y_0 = const()[name = string("equal_50_y_0"), val = int32(-1)]; tensor equal_50 = equal(x = concat_205, y = equal_50_y_0)[name = string("equal_50")]; tensor select_50 = select(a = shape_50_cast_fp16, b = concat_205, cond = equal_50)[name = string("select_50")]; tensor real_div_50 = real_div(x = select_50, y = shape_50_cast_fp16)[name = string("real_div_50")]; tensor hidden_states_1179_cast_fp16 = tile(reps = real_div_50, x = var_4974_cast_fp16)[name = string("hidden_states_1179_cast_fp16")]; tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 16, -1, 128])]; tensor key_states_51_cast_fp16 = reshape(shape = concat_206x, x = hidden_states_1179_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor hidden_states_1181_cast_fp16 = transpose(perm = hidden_states_1181_perm_0, x = var_4931_cast_fp16)[name = string("transpose_9")]; tensor var_4984_shape_cast_fp16 = shape(x = hidden_states_1181_cast_fp16)[name = string("op_4984_shape_cast_fp16")]; int32 gather_362 = const()[name = string("gather_362"), val = int32(1)]; int32 gather_363 = const()[name = string("gather_363"), val = int32(8)]; int32 gather_364_axis_0 = const()[name = string("gather_364_axis_0"), val = int32(0)]; int32 gather_364_batch_dims_0 = const()[name = string("gather_364_batch_dims_0"), val = int32(0)]; bool gather_364_validate_indices_0 = const()[name = string("gather_364_validate_indices_0"), val = bool(false)]; string var_4984_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4984_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_364_indices_0_to_uint16 = const()[name = string("gather_364_indices_0_to_uint16"), val = uint16(2)]; tensor var_4984_shape_cast_fp16_to_uint16 = cast(dtype = var_4984_shape_cast_fp16_to_uint16_dtype_0, x = var_4984_shape_cast_fp16)[name = string("cast_239")]; uint16 gather_364_cast_uint16 = gather(axis = gather_364_axis_0, batch_dims = gather_364_batch_dims_0, indices = gather_364_indices_0_to_uint16, validate_indices = gather_364_validate_indices_0, x = var_4984_shape_cast_fp16_to_uint16)[name = string("gather_364_cast_uint16")]; string gather_364_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_364_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_365 = const()[name = string("gather_365"), val = int32(128)]; tensor var_4991_axes_0 = const()[name = string("op_4991_axes_0"), val = tensor([2])]; tensor var_4991_cast_fp16 = expand_dims(axes = var_4991_axes_0, x = hidden_states_1181_cast_fp16)[name = string("op_4991_cast_fp16")]; int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; int32 gather_364_cast_uint16_to_int32 = cast(dtype = gather_364_cast_uint16_to_int32_dtype_0, x = gather_364_cast_uint16)[name = string("cast_238")]; tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (gather_362, gather_363, var_4866, gather_364_cast_uint16_to_int32, gather_365))[name = string("concat_207")]; tensor shape_51_cast_fp16 = shape(x = var_4991_cast_fp16)[name = string("shape_51_cast_fp16")]; int32 equal_51_y_0 = const()[name = string("equal_51_y_0"), val = int32(-1)]; tensor equal_51 = equal(x = concat_207, y = equal_51_y_0)[name = string("equal_51")]; tensor select_51 = select(a = shape_51_cast_fp16, b = concat_207, cond = equal_51)[name = string("select_51")]; tensor real_div_51 = real_div(x = select_51, y = shape_51_cast_fp16)[name = string("real_div_51")]; tensor hidden_states_1183_cast_fp16 = tile(reps = real_div_51, x = var_4991_cast_fp16)[name = string("hidden_states_1183_cast_fp16")]; tensor concat_208x = const()[name = string("concat_208x"), val = tensor([1, 16, -1, 128])]; tensor value_states_51_cast_fp16 = reshape(shape = concat_208x, x = hidden_states_1183_cast_fp16)[name = string("value_states_51_cast_fp16")]; bool var_5002_transpose_x_1 = const()[name = string("op_5002_transpose_x_1"), val = bool(false)]; bool var_5002_transpose_y_1 = const()[name = string("op_5002_transpose_y_1"), val = bool(true)]; tensor var_5002_cast_fp16 = matmul(transpose_x = var_5002_transpose_x_1, transpose_y = var_5002_transpose_y_1, x = query_51_cast_fp16, y = key_states_51_cast_fp16)[name = string("op_5002_cast_fp16")]; fp16 var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_101_cast_fp16 = mul(x = var_5002_cast_fp16, y = var_5003_to_fp16)[name = string("attn_weights_101_cast_fp16")]; tensor input_303_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_303_cast_fp16")]; tensor var_5006_cast_fp16 = softmax(axis = var_4867, x = input_303_cast_fp16)[name = string("op_5006_cast_fp16")]; bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = var_5006_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_5010_perm_0 = const()[name = string("op_5010_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_209_axis_0 = const()[name = string("concat_209_axis_0"), val = int32(0)]; bool concat_209_interleave_0 = const()[name = string("concat_209_interleave_0"), val = bool(false)]; int32 gather_353_cast_uint16_to_int32 = cast(dtype = gather_353_cast_uint16_to_int32_dtype_0, x = gather_353_cast_uint16)[name = string("cast_242")]; tensor concat_209 = concat(axis = concat_209_axis_0, interleave = concat_209_interleave_0, values = (gather_352, gather_353_cast_uint16_to_int32, var_4867))[name = string("concat_209")]; tensor var_5010_cast_fp16 = transpose(perm = var_5010_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_8")]; tensor var_5013_cast_fp16 = reshape(shape = concat_209, x = var_5010_cast_fp16)[name = string("op_5013_cast_fp16")]; tensor layers_25_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_25_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105587264)))]; tensor linear_178_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_o_proj_weight_to_fp16, x = var_5013_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor hidden_states_1187_cast_fp16 = add(x = hidden_states_1151_cast_fp16, y = linear_178_cast_fp16)[name = string("hidden_states_1187_cast_fp16")]; fp16 var_4866_promoted_3_to_fp16 = const()[name = string("op_4866_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_5020_cast_fp16 = pow(x = hidden_states_1187_cast_fp16, y = var_4866_promoted_3_to_fp16)[name = string("op_5020_cast_fp16")]; tensor variance_207_axes_0 = const()[name = string("variance_207_axes_0"), val = tensor([-1])]; bool variance_207_keep_dims_0 = const()[name = string("variance_207_keep_dims_0"), val = bool(true)]; tensor variance_207_cast_fp16 = reduce_mean(axes = variance_207_axes_0, keep_dims = variance_207_keep_dims_0, x = var_5020_cast_fp16)[name = string("variance_207_cast_fp16")]; fp16 var_5023_to_fp16 = const()[name = string("op_5023_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5024_cast_fp16 = add(x = variance_207_cast_fp16, y = var_5023_to_fp16)[name = string("op_5024_cast_fp16")]; fp32 var_5025_epsilon_0 = const()[name = string("op_5025_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5025_cast_fp16 = rsqrt(epsilon = var_5025_epsilon_0, x = var_5024_cast_fp16)[name = string("op_5025_cast_fp16")]; tensor hidden_states_1191_cast_fp16 = mul(x = hidden_states_1187_cast_fp16, y = var_5025_cast_fp16)[name = string("hidden_states_1191_cast_fp16")]; tensor layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1109781632)))]; tensor input_309_cast_fp16 = mul(x = layers_25_post_attention_layernorm_weight_to_fp16, y = hidden_states_1191_cast_fp16)[name = string("input_309_cast_fp16")]; tensor layers_25_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_25_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1109783744)))]; tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_gate_proj_weight_to_fp16, x = input_309_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor var_5037_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_5037_cast_fp16")]; tensor layers_25_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_25_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1116075264)))]; tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_up_proj_weight_to_fp16, x = input_309_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor input_313_cast_fp16 = mul(x = var_5037_cast_fp16, y = linear_180_cast_fp16)[name = string("input_313_cast_fp16")]; tensor layers_25_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_25_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122366784)))]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_mlp_down_proj_weight_to_fp16, x = input_313_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor hidden_states_1197_cast_fp16 = add(x = hidden_states_1187_cast_fp16, y = linear_181_cast_fp16)[name = string("hidden_states_1197_cast_fp16")]; int32 var_5054 = const()[name = string("op_5054"), val = int32(2)]; int32 var_5055 = const()[name = string("op_5055"), val = int32(-1)]; fp16 var_5054_promoted_to_fp16 = const()[name = string("op_5054_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5064_cast_fp16 = pow(x = hidden_states_1197_cast_fp16, y = var_5054_promoted_to_fp16)[name = string("op_5064_cast_fp16")]; tensor variance_209_axes_0 = const()[name = string("variance_209_axes_0"), val = tensor([-1])]; bool variance_209_keep_dims_0 = const()[name = string("variance_209_keep_dims_0"), val = bool(true)]; tensor variance_209_cast_fp16 = reduce_mean(axes = variance_209_axes_0, keep_dims = variance_209_keep_dims_0, x = var_5064_cast_fp16)[name = string("variance_209_cast_fp16")]; fp16 var_5067_to_fp16 = const()[name = string("op_5067_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5068_cast_fp16 = add(x = variance_209_cast_fp16, y = var_5067_to_fp16)[name = string("op_5068_cast_fp16")]; fp32 var_5069_epsilon_0 = const()[name = string("op_5069_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5069_cast_fp16 = rsqrt(epsilon = var_5069_epsilon_0, x = var_5068_cast_fp16)[name = string("op_5069_cast_fp16")]; tensor hidden_states_1201_cast_fp16 = mul(x = hidden_states_1197_cast_fp16, y = var_5069_cast_fp16)[name = string("hidden_states_1201_cast_fp16")]; tensor layers_26_input_layernorm_weight_to_fp16 = const()[name = string("layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1128658304)))]; tensor hidden_states_1205_cast_fp16 = mul(x = layers_26_input_layernorm_weight_to_fp16, y = hidden_states_1201_cast_fp16)[name = string("hidden_states_1205_cast_fp16")]; tensor var_5082_shape_cast_fp16 = shape(x = hidden_states_1205_cast_fp16)[name = string("op_5082_shape_cast_fp16")]; int32 gather_366 = const()[name = string("gather_366"), val = int32(1)]; int32 gather_367_axis_0 = const()[name = string("gather_367_axis_0"), val = int32(0)]; int32 gather_367_batch_dims_0 = const()[name = string("gather_367_batch_dims_0"), val = int32(0)]; bool gather_367_validate_indices_0 = const()[name = string("gather_367_validate_indices_0"), val = bool(false)]; string var_5082_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5082_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_367_indices_0_to_uint16 = const()[name = string("gather_367_indices_0_to_uint16"), val = uint16(1)]; tensor var_5082_shape_cast_fp16_to_uint16 = cast(dtype = var_5082_shape_cast_fp16_to_uint16_dtype_0, x = var_5082_shape_cast_fp16)[name = string("cast_237")]; uint16 gather_367_cast_uint16 = gather(axis = gather_367_axis_0, batch_dims = gather_367_batch_dims_0, indices = gather_367_indices_0_to_uint16, validate_indices = gather_367_validate_indices_0, x = var_5082_shape_cast_fp16_to_uint16)[name = string("gather_367_cast_uint16")]; string gather_367_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_367_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_26_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_26_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1128660416)))]; tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_26_self_attn_q_proj_weight_to_fp16, x = hidden_states_1205_cast_fp16)[name = string("linear_182_cast_fp16")]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1207_cast_fp16 = reshape(shape = concat_210x, x = linear_182_cast_fp16)[name = string("hidden_states_1207_cast_fp16")]; fp16 var_5054_promoted_1_to_fp16 = const()[name = string("op_5054_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_5090_cast_fp16 = pow(x = hidden_states_1207_cast_fp16, y = var_5054_promoted_1_to_fp16)[name = string("op_5090_cast_fp16")]; tensor variance_211_axes_0 = const()[name = string("variance_211_axes_0"), val = tensor([-1])]; bool variance_211_keep_dims_0 = const()[name = string("variance_211_keep_dims_0"), val = bool(true)]; tensor variance_211_cast_fp16 = reduce_mean(axes = variance_211_axes_0, keep_dims = variance_211_keep_dims_0, x = var_5090_cast_fp16)[name = string("variance_211_cast_fp16")]; fp16 var_5093_to_fp16 = const()[name = string("op_5093_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5094_cast_fp16 = add(x = variance_211_cast_fp16, y = var_5093_to_fp16)[name = string("op_5094_cast_fp16")]; fp32 var_5095_epsilon_0 = const()[name = string("op_5095_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5095_cast_fp16 = rsqrt(epsilon = var_5095_epsilon_0, x = var_5094_cast_fp16)[name = string("op_5095_cast_fp16")]; tensor hidden_states_1211_cast_fp16 = mul(x = hidden_states_1207_cast_fp16, y = var_5095_cast_fp16)[name = string("hidden_states_1211_cast_fp16")]; tensor layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1132854784)))]; tensor var_5098_cast_fp16 = mul(x = layers_26_self_attn_q_norm_weight_to_fp16, y = hidden_states_1211_cast_fp16)[name = string("op_5098_cast_fp16")]; tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_26_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_26_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1132855104)))]; tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_k_proj_weight_to_fp16, x = hidden_states_1205_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1215_cast_fp16 = reshape(shape = concat_211x, x = linear_183_cast_fp16)[name = string("hidden_states_1215_cast_fp16")]; fp16 var_5054_promoted_2_to_fp16 = const()[name = string("op_5054_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_5106_cast_fp16 = pow(x = hidden_states_1215_cast_fp16, y = var_5054_promoted_2_to_fp16)[name = string("op_5106_cast_fp16")]; tensor variance_213_axes_0 = const()[name = string("variance_213_axes_0"), val = tensor([-1])]; bool variance_213_keep_dims_0 = const()[name = string("variance_213_keep_dims_0"), val = bool(true)]; tensor variance_213_cast_fp16 = reduce_mean(axes = variance_213_axes_0, keep_dims = variance_213_keep_dims_0, x = var_5106_cast_fp16)[name = string("variance_213_cast_fp16")]; fp16 var_5109_to_fp16 = const()[name = string("op_5109_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5110_cast_fp16 = add(x = variance_213_cast_fp16, y = var_5109_to_fp16)[name = string("op_5110_cast_fp16")]; fp32 var_5111_epsilon_0 = const()[name = string("op_5111_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5111_cast_fp16 = rsqrt(epsilon = var_5111_epsilon_0, x = var_5110_cast_fp16)[name = string("op_5111_cast_fp16")]; tensor hidden_states_1219_cast_fp16 = mul(x = hidden_states_1215_cast_fp16, y = var_5111_cast_fp16)[name = string("hidden_states_1219_cast_fp16")]; tensor layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1134952320)))]; tensor var_5114_cast_fp16 = mul(x = layers_26_self_attn_k_norm_weight_to_fp16, y = hidden_states_1219_cast_fp16)[name = string("op_5114_cast_fp16")]; tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_26_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_26_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1134952640)))]; tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_v_proj_weight_to_fp16, x = hidden_states_1205_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 8, 128])]; tensor var_5119_cast_fp16 = reshape(shape = concat_212x, x = linear_184_cast_fp16)[name = string("op_5119_cast_fp16")]; tensor hidden_states_1227_perm_0 = const()[name = string("hidden_states_1227_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_53_cast_fp16 = transpose(perm = q_53_perm_0, x = var_5098_cast_fp16)[name = string("transpose_7")]; tensor var_5123_cast_fp16 = mul(x = q_53_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5123_cast_fp16")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53_cast_fp16)[name = string("x1_105_cast_fp16")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53_cast_fp16)[name = string("x2_105_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5134_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_5134_cast_fp16")]; bool var_5136_interleave_0 = const()[name = string("op_5136_interleave_0"), val = bool(false)]; tensor var_5136_cast_fp16 = concat(axis = var_5055, interleave = var_5136_interleave_0, values = (var_5134_cast_fp16, x1_105_cast_fp16))[name = string("op_5136_cast_fp16")]; tensor var_5137_cast_fp16 = mul(x = var_5136_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5137_cast_fp16")]; tensor query_53_cast_fp16 = add(x = var_5123_cast_fp16, y = var_5137_cast_fp16)[name = string("query_53_cast_fp16")]; tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = var_5114_cast_fp16)[name = string("transpose_6")]; tensor var_5139_cast_fp16 = mul(x = k_53_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5139_cast_fp16")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53_cast_fp16)[name = string("x1_107_cast_fp16")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53_cast_fp16)[name = string("x2_107_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5150_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_5150_cast_fp16")]; bool var_5152_interleave_0 = const()[name = string("op_5152_interleave_0"), val = bool(false)]; tensor var_5152_cast_fp16 = concat(axis = var_5055, interleave = var_5152_interleave_0, values = (var_5150_cast_fp16, x1_107_cast_fp16))[name = string("op_5152_cast_fp16")]; tensor var_5153_cast_fp16 = mul(x = var_5152_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5153_cast_fp16")]; tensor hidden_states_1223_cast_fp16 = add(x = var_5139_cast_fp16, y = var_5153_cast_fp16)[name = string("hidden_states_1223_cast_fp16")]; tensor var_5155_shape_cast_fp16 = shape(x = hidden_states_1223_cast_fp16)[name = string("op_5155_shape_cast_fp16")]; int32 gather_372 = const()[name = string("gather_372"), val = int32(1)]; int32 gather_373 = const()[name = string("gather_373"), val = int32(8)]; int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)]; int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)]; bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)]; string var_5155_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5155_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_374_indices_0_to_uint16 = const()[name = string("gather_374_indices_0_to_uint16"), val = uint16(2)]; tensor var_5155_shape_cast_fp16_to_uint16 = cast(dtype = var_5155_shape_cast_fp16_to_uint16_dtype_0, x = var_5155_shape_cast_fp16)[name = string("cast_235")]; uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = gather_374_indices_0_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_5155_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")]; string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_375 = const()[name = string("gather_375"), val = int32(128)]; tensor var_5162_axes_0 = const()[name = string("op_5162_axes_0"), val = tensor([2])]; tensor var_5162_cast_fp16 = expand_dims(axes = var_5162_axes_0, x = hidden_states_1223_cast_fp16)[name = string("op_5162_cast_fp16")]; int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_234")]; tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_372, gather_373, var_5054, gather_374_cast_uint16_to_int32, gather_375))[name = string("concat_213")]; tensor shape_52_cast_fp16 = shape(x = var_5162_cast_fp16)[name = string("shape_52_cast_fp16")]; int32 equal_52_y_0 = const()[name = string("equal_52_y_0"), val = int32(-1)]; tensor equal_52 = equal(x = concat_213, y = equal_52_y_0)[name = string("equal_52")]; tensor select_52 = select(a = shape_52_cast_fp16, b = concat_213, cond = equal_52)[name = string("select_52")]; tensor real_div_52 = real_div(x = select_52, y = shape_52_cast_fp16)[name = string("real_div_52")]; tensor hidden_states_1225_cast_fp16 = tile(reps = real_div_52, x = var_5162_cast_fp16)[name = string("hidden_states_1225_cast_fp16")]; tensor concat_214x = const()[name = string("concat_214x"), val = tensor([1, 16, -1, 128])]; tensor key_states_53_cast_fp16 = reshape(shape = concat_214x, x = hidden_states_1225_cast_fp16)[name = string("key_states_53_cast_fp16")]; tensor hidden_states_1227_cast_fp16 = transpose(perm = hidden_states_1227_perm_0, x = var_5119_cast_fp16)[name = string("transpose_5")]; tensor var_5172_shape_cast_fp16 = shape(x = hidden_states_1227_cast_fp16)[name = string("op_5172_shape_cast_fp16")]; int32 gather_376 = const()[name = string("gather_376"), val = int32(1)]; int32 gather_377 = const()[name = string("gather_377"), val = int32(8)]; int32 gather_378_axis_0 = const()[name = string("gather_378_axis_0"), val = int32(0)]; int32 gather_378_batch_dims_0 = const()[name = string("gather_378_batch_dims_0"), val = int32(0)]; bool gather_378_validate_indices_0 = const()[name = string("gather_378_validate_indices_0"), val = bool(false)]; string var_5172_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5172_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_378_indices_0_to_uint16 = const()[name = string("gather_378_indices_0_to_uint16"), val = uint16(2)]; tensor var_5172_shape_cast_fp16_to_uint16 = cast(dtype = var_5172_shape_cast_fp16_to_uint16_dtype_0, x = var_5172_shape_cast_fp16)[name = string("cast_233")]; uint16 gather_378_cast_uint16 = gather(axis = gather_378_axis_0, batch_dims = gather_378_batch_dims_0, indices = gather_378_indices_0_to_uint16, validate_indices = gather_378_validate_indices_0, x = var_5172_shape_cast_fp16_to_uint16)[name = string("gather_378_cast_uint16")]; string gather_378_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_378_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_379 = const()[name = string("gather_379"), val = int32(128)]; tensor var_5179_axes_0 = const()[name = string("op_5179_axes_0"), val = tensor([2])]; tensor var_5179_cast_fp16 = expand_dims(axes = var_5179_axes_0, x = hidden_states_1227_cast_fp16)[name = string("op_5179_cast_fp16")]; int32 concat_215_axis_0 = const()[name = string("concat_215_axis_0"), val = int32(0)]; bool concat_215_interleave_0 = const()[name = string("concat_215_interleave_0"), val = bool(false)]; int32 gather_378_cast_uint16_to_int32 = cast(dtype = gather_378_cast_uint16_to_int32_dtype_0, x = gather_378_cast_uint16)[name = string("cast_232")]; tensor concat_215 = concat(axis = concat_215_axis_0, interleave = concat_215_interleave_0, values = (gather_376, gather_377, var_5054, gather_378_cast_uint16_to_int32, gather_379))[name = string("concat_215")]; tensor shape_53_cast_fp16 = shape(x = var_5179_cast_fp16)[name = string("shape_53_cast_fp16")]; int32 equal_53_y_0 = const()[name = string("equal_53_y_0"), val = int32(-1)]; tensor equal_53 = equal(x = concat_215, y = equal_53_y_0)[name = string("equal_53")]; tensor select_53 = select(a = shape_53_cast_fp16, b = concat_215, cond = equal_53)[name = string("select_53")]; tensor real_div_53 = real_div(x = select_53, y = shape_53_cast_fp16)[name = string("real_div_53")]; tensor hidden_states_1229_cast_fp16 = tile(reps = real_div_53, x = var_5179_cast_fp16)[name = string("hidden_states_1229_cast_fp16")]; tensor concat_216x = const()[name = string("concat_216x"), val = tensor([1, 16, -1, 128])]; tensor value_states_53_cast_fp16 = reshape(shape = concat_216x, x = hidden_states_1229_cast_fp16)[name = string("value_states_53_cast_fp16")]; bool var_5190_transpose_x_1 = const()[name = string("op_5190_transpose_x_1"), val = bool(false)]; bool var_5190_transpose_y_1 = const()[name = string("op_5190_transpose_y_1"), val = bool(true)]; tensor var_5190_cast_fp16 = matmul(transpose_x = var_5190_transpose_x_1, transpose_y = var_5190_transpose_y_1, x = query_53_cast_fp16, y = key_states_53_cast_fp16)[name = string("op_5190_cast_fp16")]; fp16 var_5191_to_fp16 = const()[name = string("op_5191_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_105_cast_fp16 = mul(x = var_5190_cast_fp16, y = var_5191_to_fp16)[name = string("attn_weights_105_cast_fp16")]; tensor input_315_cast_fp16 = add(x = attn_weights_105_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_315_cast_fp16")]; tensor var_5194_cast_fp16 = softmax(axis = var_5055, x = input_315_cast_fp16)[name = string("op_5194_cast_fp16")]; bool attn_output_105_transpose_x_0 = const()[name = string("attn_output_105_transpose_x_0"), val = bool(false)]; bool attn_output_105_transpose_y_0 = const()[name = string("attn_output_105_transpose_y_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = matmul(transpose_x = attn_output_105_transpose_x_0, transpose_y = attn_output_105_transpose_y_0, x = var_5194_cast_fp16, y = value_states_53_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_5198_perm_0 = const()[name = string("op_5198_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_217_axis_0 = const()[name = string("concat_217_axis_0"), val = int32(0)]; bool concat_217_interleave_0 = const()[name = string("concat_217_interleave_0"), val = bool(false)]; int32 gather_367_cast_uint16_to_int32 = cast(dtype = gather_367_cast_uint16_to_int32_dtype_0, x = gather_367_cast_uint16)[name = string("cast_236")]; tensor concat_217 = concat(axis = concat_217_axis_0, interleave = concat_217_interleave_0, values = (gather_366, gather_367_cast_uint16_to_int32, var_5055))[name = string("concat_217")]; tensor var_5198_cast_fp16 = transpose(perm = var_5198_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_4")]; tensor var_5201_cast_fp16 = reshape(shape = concat_217, x = var_5198_cast_fp16)[name = string("op_5201_cast_fp16")]; tensor layers_26_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_26_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1137049856)))]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_o_proj_weight_to_fp16, x = var_5201_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor hidden_states_1233_cast_fp16 = add(x = hidden_states_1197_cast_fp16, y = linear_185_cast_fp16)[name = string("hidden_states_1233_cast_fp16")]; fp16 var_5054_promoted_3_to_fp16 = const()[name = string("op_5054_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_5208_cast_fp16 = pow(x = hidden_states_1233_cast_fp16, y = var_5054_promoted_3_to_fp16)[name = string("op_5208_cast_fp16")]; tensor variance_215_axes_0 = const()[name = string("variance_215_axes_0"), val = tensor([-1])]; bool variance_215_keep_dims_0 = const()[name = string("variance_215_keep_dims_0"), val = bool(true)]; tensor variance_215_cast_fp16 = reduce_mean(axes = variance_215_axes_0, keep_dims = variance_215_keep_dims_0, x = var_5208_cast_fp16)[name = string("variance_215_cast_fp16")]; fp16 var_5211_to_fp16 = const()[name = string("op_5211_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5212_cast_fp16 = add(x = variance_215_cast_fp16, y = var_5211_to_fp16)[name = string("op_5212_cast_fp16")]; fp32 var_5213_epsilon_0 = const()[name = string("op_5213_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5213_cast_fp16 = rsqrt(epsilon = var_5213_epsilon_0, x = var_5212_cast_fp16)[name = string("op_5213_cast_fp16")]; tensor hidden_states_1237_cast_fp16 = mul(x = hidden_states_1233_cast_fp16, y = var_5213_cast_fp16)[name = string("hidden_states_1237_cast_fp16")]; tensor layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1141244224)))]; tensor input_321_cast_fp16 = mul(x = layers_26_post_attention_layernorm_weight_to_fp16, y = hidden_states_1237_cast_fp16)[name = string("input_321_cast_fp16")]; tensor layers_26_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_26_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1141246336)))]; tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_gate_proj_weight_to_fp16, x = input_321_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_5225_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_5225_cast_fp16")]; tensor layers_26_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_26_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1147537856)))]; tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_up_proj_weight_to_fp16, x = input_321_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor input_325_cast_fp16 = mul(x = var_5225_cast_fp16, y = linear_187_cast_fp16)[name = string("input_325_cast_fp16")]; tensor layers_26_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_26_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1153829376)))]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_mlp_down_proj_weight_to_fp16, x = input_325_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor hidden_states_1243_cast_fp16 = add(x = hidden_states_1233_cast_fp16, y = linear_188_cast_fp16)[name = string("hidden_states_1243_cast_fp16")]; int32 var_5242 = const()[name = string("op_5242"), val = int32(2)]; int32 var_5243 = const()[name = string("op_5243"), val = int32(-1)]; fp16 var_5242_promoted_to_fp16 = const()[name = string("op_5242_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5252_cast_fp16 = pow(x = hidden_states_1243_cast_fp16, y = var_5242_promoted_to_fp16)[name = string("op_5252_cast_fp16")]; tensor variance_217_axes_0 = const()[name = string("variance_217_axes_0"), val = tensor([-1])]; bool variance_217_keep_dims_0 = const()[name = string("variance_217_keep_dims_0"), val = bool(true)]; tensor variance_217_cast_fp16 = reduce_mean(axes = variance_217_axes_0, keep_dims = variance_217_keep_dims_0, x = var_5252_cast_fp16)[name = string("variance_217_cast_fp16")]; fp16 var_5255_to_fp16 = const()[name = string("op_5255_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5256_cast_fp16 = add(x = variance_217_cast_fp16, y = var_5255_to_fp16)[name = string("op_5256_cast_fp16")]; fp32 var_5257_epsilon_0 = const()[name = string("op_5257_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5257_cast_fp16 = rsqrt(epsilon = var_5257_epsilon_0, x = var_5256_cast_fp16)[name = string("op_5257_cast_fp16")]; tensor hidden_states_1247_cast_fp16 = mul(x = hidden_states_1243_cast_fp16, y = var_5257_cast_fp16)[name = string("hidden_states_1247_cast_fp16")]; tensor layers_27_input_layernorm_weight_to_fp16 = const()[name = string("layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1160120896)))]; tensor hidden_states_1251_cast_fp16 = mul(x = layers_27_input_layernorm_weight_to_fp16, y = hidden_states_1247_cast_fp16)[name = string("hidden_states_1251_cast_fp16")]; tensor var_5270_shape_cast_fp16 = shape(x = hidden_states_1251_cast_fp16)[name = string("op_5270_shape_cast_fp16")]; int32 gather_380 = const()[name = string("gather_380"), val = int32(1)]; int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; string var_5270_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5270_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_381_indices_0_to_uint16 = const()[name = string("gather_381_indices_0_to_uint16"), val = uint16(1)]; tensor var_5270_shape_cast_fp16_to_uint16 = cast(dtype = var_5270_shape_cast_fp16_to_uint16_dtype_0, x = var_5270_shape_cast_fp16)[name = string("cast_231")]; uint16 gather_381_cast_uint16 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = gather_381_indices_0_to_uint16, validate_indices = gather_381_validate_indices_0, x = var_5270_shape_cast_fp16_to_uint16)[name = string("gather_381_cast_uint16")]; string gather_381_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_381_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor layers_27_self_attn_q_proj_weight_to_fp16 = const()[name = string("layers_27_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1160123008)))]; tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_27_self_attn_q_proj_weight_to_fp16, x = hidden_states_1251_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor concat_218x = const()[name = string("concat_218x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1253_cast_fp16 = reshape(shape = concat_218x, x = linear_189_cast_fp16)[name = string("hidden_states_1253_cast_fp16")]; fp16 var_5242_promoted_1_to_fp16 = const()[name = string("op_5242_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_5278_cast_fp16 = pow(x = hidden_states_1253_cast_fp16, y = var_5242_promoted_1_to_fp16)[name = string("op_5278_cast_fp16")]; tensor variance_219_axes_0 = const()[name = string("variance_219_axes_0"), val = tensor([-1])]; bool variance_219_keep_dims_0 = const()[name = string("variance_219_keep_dims_0"), val = bool(true)]; tensor variance_219_cast_fp16 = reduce_mean(axes = variance_219_axes_0, keep_dims = variance_219_keep_dims_0, x = var_5278_cast_fp16)[name = string("variance_219_cast_fp16")]; fp16 var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5282_cast_fp16 = add(x = variance_219_cast_fp16, y = var_5281_to_fp16)[name = string("op_5282_cast_fp16")]; fp32 var_5283_epsilon_0 = const()[name = string("op_5283_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5283_cast_fp16 = rsqrt(epsilon = var_5283_epsilon_0, x = var_5282_cast_fp16)[name = string("op_5283_cast_fp16")]; tensor hidden_states_1257_cast_fp16 = mul(x = hidden_states_1253_cast_fp16, y = var_5283_cast_fp16)[name = string("hidden_states_1257_cast_fp16")]; tensor layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164317376)))]; tensor var_5286_cast_fp16 = mul(x = layers_27_self_attn_q_norm_weight_to_fp16, y = hidden_states_1257_cast_fp16)[name = string("op_5286_cast_fp16")]; tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_27_self_attn_k_proj_weight_to_fp16 = const()[name = string("layers_27_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164317696)))]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_k_proj_weight_to_fp16, x = hidden_states_1251_cast_fp16)[name = string("linear_190_cast_fp16")]; tensor concat_219x = const()[name = string("concat_219x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1261_cast_fp16 = reshape(shape = concat_219x, x = linear_190_cast_fp16)[name = string("hidden_states_1261_cast_fp16")]; fp16 var_5242_promoted_2_to_fp16 = const()[name = string("op_5242_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_5294_cast_fp16 = pow(x = hidden_states_1261_cast_fp16, y = var_5242_promoted_2_to_fp16)[name = string("op_5294_cast_fp16")]; tensor variance_221_axes_0 = const()[name = string("variance_221_axes_0"), val = tensor([-1])]; bool variance_221_keep_dims_0 = const()[name = string("variance_221_keep_dims_0"), val = bool(true)]; tensor variance_221_cast_fp16 = reduce_mean(axes = variance_221_axes_0, keep_dims = variance_221_keep_dims_0, x = var_5294_cast_fp16)[name = string("variance_221_cast_fp16")]; fp16 var_5297_to_fp16 = const()[name = string("op_5297_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5298_cast_fp16 = add(x = variance_221_cast_fp16, y = var_5297_to_fp16)[name = string("op_5298_cast_fp16")]; fp32 var_5299_epsilon_0 = const()[name = string("op_5299_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5299_cast_fp16 = rsqrt(epsilon = var_5299_epsilon_0, x = var_5298_cast_fp16)[name = string("op_5299_cast_fp16")]; tensor hidden_states_1265_cast_fp16 = mul(x = hidden_states_1261_cast_fp16, y = var_5299_cast_fp16)[name = string("hidden_states_1265_cast_fp16")]; tensor layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1166414912)))]; tensor var_5302_cast_fp16 = mul(x = layers_27_self_attn_k_norm_weight_to_fp16, y = hidden_states_1265_cast_fp16)[name = string("op_5302_cast_fp16")]; tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_27_self_attn_v_proj_weight_to_fp16 = const()[name = string("layers_27_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1166415232)))]; tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_v_proj_weight_to_fp16, x = hidden_states_1251_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 8, 128])]; tensor var_5307_cast_fp16 = reshape(shape = concat_220x, x = linear_191_cast_fp16)[name = string("op_5307_cast_fp16")]; tensor hidden_states_1273_perm_0 = const()[name = string("hidden_states_1273_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_5286_cast_fp16)[name = string("transpose_3")]; tensor var_5311_cast_fp16 = mul(x = q_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5311_cast_fp16")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_cast_fp16)[name = string("x1_109_cast_fp16")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_cast_fp16)[name = string("x2_109_cast_fp16")]; fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5322_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_5322_cast_fp16")]; bool var_5324_interleave_0 = const()[name = string("op_5324_interleave_0"), val = bool(false)]; tensor var_5324_cast_fp16 = concat(axis = var_5243, interleave = var_5324_interleave_0, values = (var_5322_cast_fp16, x1_109_cast_fp16))[name = string("op_5324_cast_fp16")]; tensor var_5325_cast_fp16 = mul(x = var_5324_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5325_cast_fp16")]; tensor query_cast_fp16 = add(x = var_5311_cast_fp16, y = var_5325_cast_fp16)[name = string("query_cast_fp16")]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_5302_cast_fp16)[name = string("transpose_2")]; tensor var_5327_cast_fp16 = mul(x = k_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5327_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5338_cast_fp16 = mul(x = x2_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_5338_cast_fp16")]; bool var_5340_interleave_0 = const()[name = string("op_5340_interleave_0"), val = bool(false)]; tensor var_5340_cast_fp16 = concat(axis = var_5243, interleave = var_5340_interleave_0, values = (var_5338_cast_fp16, x1_cast_fp16))[name = string("op_5340_cast_fp16")]; tensor var_5341_cast_fp16 = mul(x = var_5340_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5341_cast_fp16")]; tensor hidden_states_1269_cast_fp16 = add(x = var_5327_cast_fp16, y = var_5341_cast_fp16)[name = string("hidden_states_1269_cast_fp16")]; tensor var_5343_shape_cast_fp16 = shape(x = hidden_states_1269_cast_fp16)[name = string("op_5343_shape_cast_fp16")]; int32 gather_386 = const()[name = string("gather_386"), val = int32(1)]; int32 gather_387 = const()[name = string("gather_387"), val = int32(8)]; int32 gather_388_axis_0 = const()[name = string("gather_388_axis_0"), val = int32(0)]; int32 gather_388_batch_dims_0 = const()[name = string("gather_388_batch_dims_0"), val = int32(0)]; bool gather_388_validate_indices_0 = const()[name = string("gather_388_validate_indices_0"), val = bool(false)]; string var_5343_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5343_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_388_indices_0_to_uint16 = const()[name = string("gather_388_indices_0_to_uint16"), val = uint16(2)]; tensor var_5343_shape_cast_fp16_to_uint16 = cast(dtype = var_5343_shape_cast_fp16_to_uint16_dtype_0, x = var_5343_shape_cast_fp16)[name = string("cast_229")]; uint16 gather_388_cast_uint16 = gather(axis = gather_388_axis_0, batch_dims = gather_388_batch_dims_0, indices = gather_388_indices_0_to_uint16, validate_indices = gather_388_validate_indices_0, x = var_5343_shape_cast_fp16_to_uint16)[name = string("gather_388_cast_uint16")]; string gather_388_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_388_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_389 = const()[name = string("gather_389"), val = int32(128)]; tensor var_5350_axes_0 = const()[name = string("op_5350_axes_0"), val = tensor([2])]; tensor var_5350_cast_fp16 = expand_dims(axes = var_5350_axes_0, x = hidden_states_1269_cast_fp16)[name = string("op_5350_cast_fp16")]; int32 concat_221_axis_0 = const()[name = string("concat_221_axis_0"), val = int32(0)]; bool concat_221_interleave_0 = const()[name = string("concat_221_interleave_0"), val = bool(false)]; int32 gather_388_cast_uint16_to_int32 = cast(dtype = gather_388_cast_uint16_to_int32_dtype_0, x = gather_388_cast_uint16)[name = string("cast_228")]; tensor concat_221 = concat(axis = concat_221_axis_0, interleave = concat_221_interleave_0, values = (gather_386, gather_387, var_5242, gather_388_cast_uint16_to_int32, gather_389))[name = string("concat_221")]; tensor shape_54_cast_fp16 = shape(x = var_5350_cast_fp16)[name = string("shape_54_cast_fp16")]; int32 equal_54_y_0 = const()[name = string("equal_54_y_0"), val = int32(-1)]; tensor equal_54 = equal(x = concat_221, y = equal_54_y_0)[name = string("equal_54")]; tensor select_54 = select(a = shape_54_cast_fp16, b = concat_221, cond = equal_54)[name = string("select_54")]; tensor real_div_54 = real_div(x = select_54, y = shape_54_cast_fp16)[name = string("real_div_54")]; tensor hidden_states_1271_cast_fp16 = tile(reps = real_div_54, x = var_5350_cast_fp16)[name = string("hidden_states_1271_cast_fp16")]; tensor concat_222x = const()[name = string("concat_222x"), val = tensor([1, 16, -1, 128])]; tensor key_states_cast_fp16 = reshape(shape = concat_222x, x = hidden_states_1271_cast_fp16)[name = string("key_states_cast_fp16")]; tensor hidden_states_1273_cast_fp16 = transpose(perm = hidden_states_1273_perm_0, x = var_5307_cast_fp16)[name = string("transpose_1")]; tensor var_5360_shape_cast_fp16 = shape(x = hidden_states_1273_cast_fp16)[name = string("op_5360_shape_cast_fp16")]; int32 gather_390 = const()[name = string("gather_390"), val = int32(1)]; int32 gather_391 = const()[name = string("gather_391"), val = int32(8)]; int32 gather_392_axis_0 = const()[name = string("gather_392_axis_0"), val = int32(0)]; int32 gather_392_batch_dims_0 = const()[name = string("gather_392_batch_dims_0"), val = int32(0)]; bool gather_392_validate_indices_0 = const()[name = string("gather_392_validate_indices_0"), val = bool(false)]; string var_5360_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5360_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 gather_392_indices_0_to_uint16 = const()[name = string("gather_392_indices_0_to_uint16"), val = uint16(2)]; tensor var_5360_shape_cast_fp16_to_uint16 = cast(dtype = var_5360_shape_cast_fp16_to_uint16_dtype_0, x = var_5360_shape_cast_fp16)[name = string("cast_227")]; uint16 gather_392_cast_uint16 = gather(axis = gather_392_axis_0, batch_dims = gather_392_batch_dims_0, indices = gather_392_indices_0_to_uint16, validate_indices = gather_392_validate_indices_0, x = var_5360_shape_cast_fp16_to_uint16)[name = string("gather_392_cast_uint16")]; string gather_392_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_392_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_393 = const()[name = string("gather_393"), val = int32(128)]; tensor var_5367_axes_0 = const()[name = string("op_5367_axes_0"), val = tensor([2])]; tensor var_5367_cast_fp16 = expand_dims(axes = var_5367_axes_0, x = hidden_states_1273_cast_fp16)[name = string("op_5367_cast_fp16")]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; int32 gather_392_cast_uint16_to_int32 = cast(dtype = gather_392_cast_uint16_to_int32_dtype_0, x = gather_392_cast_uint16)[name = string("cast_226")]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (gather_390, gather_391, var_5242, gather_392_cast_uint16_to_int32, gather_393))[name = string("concat_223")]; tensor shape_55_cast_fp16 = shape(x = var_5367_cast_fp16)[name = string("shape_55_cast_fp16")]; int32 equal_55_y_0 = const()[name = string("equal_55_y_0"), val = int32(-1)]; tensor equal_55 = equal(x = concat_223, y = equal_55_y_0)[name = string("equal_55")]; tensor select_55 = select(a = shape_55_cast_fp16, b = concat_223, cond = equal_55)[name = string("select_55")]; tensor real_div_55 = real_div(x = select_55, y = shape_55_cast_fp16)[name = string("real_div_55")]; tensor hidden_states_1275_cast_fp16 = tile(reps = real_div_55, x = var_5367_cast_fp16)[name = string("hidden_states_1275_cast_fp16")]; tensor concat_224x = const()[name = string("concat_224x"), val = tensor([1, 16, -1, 128])]; tensor value_states_cast_fp16 = reshape(shape = concat_224x, x = hidden_states_1275_cast_fp16)[name = string("value_states_cast_fp16")]; bool var_5378_transpose_x_1 = const()[name = string("op_5378_transpose_x_1"), val = bool(false)]; bool var_5378_transpose_y_1 = const()[name = string("op_5378_transpose_y_1"), val = bool(true)]; tensor var_5378_cast_fp16 = matmul(transpose_x = var_5378_transpose_x_1, transpose_y = var_5378_transpose_y_1, x = query_cast_fp16, y = key_states_cast_fp16)[name = string("op_5378_cast_fp16")]; fp16 var_5379_to_fp16 = const()[name = string("op_5379_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_109_cast_fp16 = mul(x = var_5378_cast_fp16, y = var_5379_to_fp16)[name = string("attn_weights_109_cast_fp16")]; tensor input_327_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = attention_mask_cast_fp16)[name = string("input_327_cast_fp16")]; tensor var_5382_cast_fp16 = softmax(axis = var_5243, x = input_327_cast_fp16)[name = string("op_5382_cast_fp16")]; bool attn_output_109_transpose_x_0 = const()[name = string("attn_output_109_transpose_x_0"), val = bool(false)]; bool attn_output_109_transpose_y_0 = const()[name = string("attn_output_109_transpose_y_0"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_0, transpose_y = attn_output_109_transpose_y_0, x = var_5382_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_109_cast_fp16")]; tensor var_5386_perm_0 = const()[name = string("op_5386_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; int32 gather_381_cast_uint16_to_int32 = cast(dtype = gather_381_cast_uint16_to_int32_dtype_0, x = gather_381_cast_uint16)[name = string("cast_230")]; tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (gather_380, gather_381_cast_uint16_to_int32, var_5243))[name = string("concat_225")]; tensor var_5386_cast_fp16 = transpose(perm = var_5386_perm_0, x = attn_output_109_cast_fp16)[name = string("transpose_0")]; tensor var_5389_cast_fp16 = reshape(shape = concat_225, x = var_5386_cast_fp16)[name = string("op_5389_cast_fp16")]; tensor layers_27_self_attn_o_proj_weight_to_fp16 = const()[name = string("layers_27_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168512448)))]; tensor linear_192_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_o_proj_weight_to_fp16, x = var_5389_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor hidden_states_1279_cast_fp16 = add(x = hidden_states_1243_cast_fp16, y = linear_192_cast_fp16)[name = string("hidden_states_1279_cast_fp16")]; fp16 var_5242_promoted_3_to_fp16 = const()[name = string("op_5242_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_5396_cast_fp16 = pow(x = hidden_states_1279_cast_fp16, y = var_5242_promoted_3_to_fp16)[name = string("op_5396_cast_fp16")]; tensor variance_223_axes_0 = const()[name = string("variance_223_axes_0"), val = tensor([-1])]; bool variance_223_keep_dims_0 = const()[name = string("variance_223_keep_dims_0"), val = bool(true)]; tensor variance_223_cast_fp16 = reduce_mean(axes = variance_223_axes_0, keep_dims = variance_223_keep_dims_0, x = var_5396_cast_fp16)[name = string("variance_223_cast_fp16")]; fp16 var_5399_to_fp16 = const()[name = string("op_5399_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5400_cast_fp16 = add(x = variance_223_cast_fp16, y = var_5399_to_fp16)[name = string("op_5400_cast_fp16")]; fp32 var_5401_epsilon_0 = const()[name = string("op_5401_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5401_cast_fp16 = rsqrt(epsilon = var_5401_epsilon_0, x = var_5400_cast_fp16)[name = string("op_5401_cast_fp16")]; tensor hidden_states_1283_cast_fp16 = mul(x = hidden_states_1279_cast_fp16, y = var_5401_cast_fp16)[name = string("hidden_states_1283_cast_fp16")]; tensor layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1172706816)))]; tensor input_333_cast_fp16 = mul(x = layers_27_post_attention_layernorm_weight_to_fp16, y = hidden_states_1283_cast_fp16)[name = string("input_333_cast_fp16")]; tensor layers_27_mlp_gate_proj_weight_to_fp16 = const()[name = string("layers_27_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1172708928)))]; tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_gate_proj_weight_to_fp16, x = input_333_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_5413_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_5413_cast_fp16")]; tensor layers_27_mlp_up_proj_weight_to_fp16 = const()[name = string("layers_27_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1179000448)))]; tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_up_proj_weight_to_fp16, x = input_333_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor input_cast_fp16 = mul(x = var_5413_cast_fp16, y = linear_194_cast_fp16)[name = string("input_cast_fp16")]; tensor layers_27_mlp_down_proj_weight_to_fp16 = const()[name = string("layers_27_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1185291968)))]; tensor linear_195_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_mlp_down_proj_weight_to_fp16, x = input_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor hidden_states_1289_cast_fp16 = add(x = hidden_states_1279_cast_fp16, y = linear_195_cast_fp16)[name = string("hidden_states_1289_cast_fp16")]; fp16 var_5423_promoted_to_fp16 = const()[name = string("op_5423_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5429_cast_fp16 = pow(x = hidden_states_1289_cast_fp16, y = var_5423_promoted_to_fp16)[name = string("op_5429_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_5429_cast_fp16)[name = string("variance_cast_fp16")]; fp16 var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5433_cast_fp16 = add(x = variance_cast_fp16, y = var_5432_to_fp16)[name = string("op_5433_cast_fp16")]; fp32 var_5434_epsilon_0 = const()[name = string("op_5434_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5434_cast_fp16 = rsqrt(epsilon = var_5434_epsilon_0, x = var_5433_cast_fp16)[name = string("op_5434_cast_fp16")]; tensor hidden_states_1293_cast_fp16 = mul(x = hidden_states_1289_cast_fp16, y = var_5434_cast_fp16)[name = string("hidden_states_1293_cast_fp16")]; tensor norm_weight_to_fp16 = const()[name = string("norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191583488)))]; tensor hidden_states = mul(x = norm_weight_to_fp16, y = hidden_states_1293_cast_fp16)[name = string("op_5437_cast_fp16")]; } -> (hidden_states); }