diff --git "a/qwen3_tts/code_decoder/12hz-0.6b-customvoice/W8A16-stateful/CodeDecoder.mlmodelc/model.mil" "b/qwen3_tts/code_decoder/12hz-0.6b-customvoice/W8A16-stateful/CodeDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/qwen3_tts/code_decoder/12hz-0.6b-customvoice/W8A16-stateful/CodeDecoder.mlmodelc/model.mil" @@ -0,0 +1,6532 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor cache_length, tensor input_embeds, tensor key_padding_mask, tensor kv_cache_update_mask, state> self_attn_key_cache, state> self_attn_value_cache) { + int32 pos_cos_batch_dims_0 = const()[name = string("pos_cos_batch_dims_0"), val = int32(0)]; + bool pos_cos_validate_indices_0 = const()[name = string("pos_cos_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_cos_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32896))))[name = string("position_embeddings_cos_weight_to_fp16_palettized")]; + string cache_length_to_int16_dtype_0 = const()[name = string("cache_length_to_int16_dtype_0"), val = string("int16")]; + string cast_572_dtype_0 = const()[name = string("cast_572_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor cache_length_to_int16 = cast(dtype = cache_length_to_int16_dtype_0, x = cache_length)[name = string("cast_5")]; + tensor cast_572 = cast(dtype = cast_572_dtype_0, x = cache_length_to_int16)[name = string("cast_4")]; + tensor greater_equal_0 = greater_equal(x = cast_572, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(256)]; + tensor add_0 = add(x = cast_572, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = cast_572, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; + string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; + tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; + tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(256)]; + tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; + tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)]; + tensor pos_cos_cast_fp16_cast_uint16_cast_uint16 = gather(axis = pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = pos_cos_batch_dims_0, indices = select_0_1, validate_indices = pos_cos_validate_indices_0, x = position_embeddings_cos_weight_to_fp16_palettized)[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16")]; + tensor obj_7_axes_0 = const()[name = string("obj_7_axes_0"), val = tensor([2])]; + tensor obj_7_cast_fp16 = expand_dims(axes = obj_7_axes_0, x = pos_cos_cast_fp16_cast_uint16_cast_uint16)[name = string("obj_7_cast_fp16")]; + int32 pos_sin_axis_0 = const()[name = string("pos_sin_axis_0"), val = int32(0)]; + int32 pos_sin_batch_dims_0 = const()[name = string("pos_sin_batch_dims_0"), val = int32(0)]; + bool pos_sin_validate_indices_0 = const()[name = string("pos_sin_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_sin_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66304))))[name = string("position_embeddings_sin_weight_to_fp16_palettized")]; + string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; + tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_1")]; + tensor pos_sin_cast_fp16_cast_uint16 = gather(axis = pos_sin_axis_0, batch_dims = pos_sin_batch_dims_0, indices = cache_length_to_uint16, validate_indices = pos_sin_validate_indices_0, x = position_embeddings_sin_weight_to_fp16_palettized)[name = string("pos_sin_cast_fp16_cast_uint16")]; + tensor obj_9_axes_0 = const()[name = string("obj_9_axes_0"), val = tensor([2])]; + tensor obj_9_cast_fp16 = expand_dims(axes = obj_9_axes_0, x = pos_sin_cast_fp16_cast_uint16)[name = string("obj_9_cast_fp16")]; + tensor read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66880)))]; + int32 var_101_axis_0 = const()[name = string("op_101_axis_0"), val = int32(1)]; + tensor var_101_cast_fp16_0, tensor var_101_cast_fp16_1, tensor var_101_cast_fp16_2, tensor var_101_cast_fp16_3, tensor var_101_cast_fp16_4, tensor var_101_cast_fp16_5, tensor var_101_cast_fp16_6, tensor var_101_cast_fp16_7, tensor var_101_cast_fp16_8, tensor var_101_cast_fp16_9, tensor var_101_cast_fp16_10, tensor var_101_cast_fp16_11, tensor var_101_cast_fp16_12, tensor var_101_cast_fp16_13, tensor var_101_cast_fp16_14, tensor var_101_cast_fp16_15, tensor var_101_cast_fp16_16, tensor var_101_cast_fp16_17, tensor var_101_cast_fp16_18, tensor var_101_cast_fp16_19, tensor var_101_cast_fp16_20, tensor var_101_cast_fp16_21, tensor var_101_cast_fp16_22, tensor var_101_cast_fp16_23, tensor var_101_cast_fp16_24, tensor var_101_cast_fp16_25, tensor var_101_cast_fp16_26, tensor var_101_cast_fp16_27 = split(axis = var_101_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_101_cast_fp16")]; + tensor read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67072)))]; + int32 var_132_axis_0 = const()[name = string("op_132_axis_0"), val = int32(1)]; + tensor var_132_cast_fp16_0, tensor var_132_cast_fp16_1, tensor var_132_cast_fp16_2, tensor var_132_cast_fp16_3, tensor var_132_cast_fp16_4, tensor var_132_cast_fp16_5, tensor var_132_cast_fp16_6, tensor var_132_cast_fp16_7, tensor var_132_cast_fp16_8, tensor var_132_cast_fp16_9, tensor var_132_cast_fp16_10, tensor var_132_cast_fp16_11, tensor var_132_cast_fp16_12, tensor var_132_cast_fp16_13, tensor var_132_cast_fp16_14, tensor var_132_cast_fp16_15, tensor var_132_cast_fp16_16, tensor var_132_cast_fp16_17, tensor var_132_cast_fp16_18, tensor var_132_cast_fp16_19, tensor var_132_cast_fp16_20, tensor var_132_cast_fp16_21, tensor var_132_cast_fp16_22, tensor var_132_cast_fp16_23, tensor var_132_cast_fp16_24, tensor var_132_cast_fp16_25, tensor var_132_cast_fp16_26, tensor var_132_cast_fp16_27 = split(axis = var_132_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_132_cast_fp16")]; + int32 var_162 = const()[name = string("op_162"), val = int32(3)]; + int32 var_172 = const()[name = string("op_172"), val = int32(-2)]; + int32 var_180 = const()[name = string("op_180"), val = int32(1)]; + tensor inputs_sq_1_cast_fp16 = mul(x = input_embeds, y = input_embeds)[name = string("inputs_sq_1_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = inputs_sq_1_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_193_cast_fp16 = add(x = variance_1_cast_fp16, y = var_192_to_fp16)[name = string("op_193_cast_fp16")]; + fp32 var_194_epsilon_0 = const()[name = string("op_194_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_194_cast_fp16 = rsqrt(epsilon = var_194_epsilon_0, x = var_193_cast_fp16)[name = string("op_194_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = mul(x = input_embeds, y = var_194_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; + tensor w_1_to_fp16 = const()[name = string("w_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67264)))]; + tensor obj_1_cast_fp16 = mul(x = w_1_to_fp16, y = hidden_states_1_cast_fp16)[name = string("obj_1_cast_fp16")]; + string query_1_pad_type_0 = const()[name = string("query_1_pad_type_0"), val = string("valid")]; + tensor query_1_strides_0 = const()[name = string("query_1_strides_0"), val = tensor([1, 1])]; + tensor query_1_pad_0 = const()[name = string("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_1_dilations_0 = const()[name = string("query_1_dilations_0"), val = tensor([1, 1])]; + int32 query_1_groups_0 = const()[name = string("query_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2166592))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2167168)))]; + tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("query_1_cast_fp16")]; + string current_key_1_pad_type_0 = const()[name = string("current_key_1_pad_type_0"), val = string("valid")]; + tensor current_key_1_strides_0 = const()[name = string("current_key_1_strides_0"), val = tensor([1, 1])]; + tensor current_key_1_pad_0 = const()[name = string("current_key_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_1_dilations_0 = const()[name = string("current_key_1_dilations_0"), val = tensor([1, 1])]; + int32 current_key_1_groups_0 = const()[name = string("current_key_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2171328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3219968))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_key_1_cast_fp16")]; + string current_value_1_pad_type_0 = const()[name = string("current_value_1_pad_type_0"), val = string("valid")]; + tensor current_value_1_strides_0 = const()[name = string("current_value_1_strides_0"), val = tensor([1, 1])]; + tensor current_value_1_pad_0 = const()[name = string("current_value_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_1_dilations_0 = const()[name = string("current_value_1_dilations_0"), val = tensor([1, 1])]; + int32 current_value_1_groups_0 = const()[name = string("current_value_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3220544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4269184))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4269760)))]; + tensor current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_value_1_cast_fp16")]; + tensor var_231 = const()[name = string("op_231"), val = tensor([16, 128, 1, 1])]; + tensor inputs_1_cast_fp16 = reshape(shape = var_231, x = query_1_cast_fp16)[name = string("inputs_1_cast_fp16")]; + tensor inputs_sq_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = inputs_1_cast_fp16)[name = string("inputs_sq_3_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = inputs_sq_3_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_238_cast_fp16 = add(x = variance_3_cast_fp16, y = var_237_to_fp16)[name = string("op_238_cast_fp16")]; + fp32 var_239_epsilon_0 = const()[name = string("op_239_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_239_cast_fp16 = rsqrt(epsilon = var_239_epsilon_0, x = var_238_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = var_239_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor w_3_to_fp16 = const()[name = string("w_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4271872)))]; + tensor query_normed_1_cast_fp16 = mul(x = w_3_to_fp16, y = hidden_states_3_cast_fp16)[name = string("query_normed_1_cast_fp16")]; + tensor var_247 = const()[name = string("op_247"), val = tensor([8, 128, 1, 1])]; + tensor inputs_3_cast_fp16 = reshape(shape = var_247, x = current_key_1_cast_fp16)[name = string("inputs_3_cast_fp16")]; + tensor inputs_sq_5_cast_fp16 = mul(x = inputs_3_cast_fp16, y = inputs_3_cast_fp16)[name = string("inputs_sq_5_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = inputs_sq_5_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_253_to_fp16 = const()[name = string("op_253_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_254_cast_fp16 = add(x = variance_5_cast_fp16, y = var_253_to_fp16)[name = string("op_254_cast_fp16")]; + fp32 var_255_epsilon_0 = const()[name = string("op_255_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_255_cast_fp16 = rsqrt(epsilon = var_255_epsilon_0, x = var_254_cast_fp16)[name = string("op_255_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = mul(x = inputs_3_cast_fp16, y = var_255_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor w_5_to_fp16 = const()[name = string("w_5_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4272192)))]; + tensor current_key_normed_1_cast_fp16 = mul(x = w_5_to_fp16, y = hidden_states_5_cast_fp16)[name = string("current_key_normed_1_cast_fp16")]; + tensor var_273 = const()[name = string("op_273"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_273, x = query_normed_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; + tensor var_275 = const()[name = string("op_275"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_1_cast_fp16 = reshape(shape = var_275, x = current_key_normed_1_cast_fp16)[name = string("mh_k_1_cast_fp16")]; + tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([1])]; + tensor cos_1_cast_fp16 = expand_dims(axes = cos_1_axes_0, x = obj_7_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([1])]; + tensor sin_1_cast_fp16 = expand_dims(axes = sin_1_axes_0, x = obj_9_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor var_279_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_279_cast_fp16")]; + tensor var_284_begin_0 = const()[name = string("op_284_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_284_end_0 = const()[name = string("op_284_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_284_end_mask_0 = const()[name = string("op_284_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_284_cast_fp16 = slice_by_index(begin = var_284_begin_0, end = var_284_end_0, end_mask = var_284_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_284_cast_fp16")]; + tensor var_290_begin_0 = const()[name = string("op_290_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_290_end_0 = const()[name = string("op_290_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_290_end_mask_0 = const()[name = string("op_290_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_290_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_292_cast_fp16 = mul(x = var_290_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_292_cast_fp16")]; + bool var_294_interleave_0 = const()[name = string("op_294_interleave_0"), val = bool(false)]; + tensor var_294_cast_fp16 = concat(axis = var_172, interleave = var_294_interleave_0, values = (var_292_cast_fp16, var_284_cast_fp16))[name = string("op_294_cast_fp16")]; + tensor var_295_cast_fp16 = mul(x = var_294_cast_fp16, y = sin_1_cast_fp16)[name = string("op_295_cast_fp16")]; + tensor mh_q_3_cast_fp16 = add(x = var_279_cast_fp16, y = var_295_cast_fp16)[name = string("mh_q_3_cast_fp16")]; + tensor var_297_cast_fp16 = mul(x = mh_k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_297_cast_fp16")]; + tensor var_302_begin_0 = const()[name = string("op_302_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_302_end_0 = const()[name = string("op_302_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_302_end_mask_0 = const()[name = string("op_302_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_302_cast_fp16")]; + tensor var_308_begin_0 = const()[name = string("op_308_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_308_end_0 = const()[name = string("op_308_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_308_end_mask_0 = const()[name = string("op_308_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_308_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_310_cast_fp16 = mul(x = var_308_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_310_cast_fp16")]; + bool var_312_interleave_0 = const()[name = string("op_312_interleave_0"), val = bool(false)]; + tensor var_312_cast_fp16 = concat(axis = var_172, interleave = var_312_interleave_0, values = (var_310_cast_fp16, var_302_cast_fp16))[name = string("op_312_cast_fp16")]; + tensor var_313_cast_fp16 = mul(x = var_312_cast_fp16, y = sin_1_cast_fp16)[name = string("op_313_cast_fp16")]; + tensor mh_k_3_cast_fp16 = add(x = var_297_cast_fp16, y = var_313_cast_fp16)[name = string("mh_k_3_cast_fp16")]; + tensor var_317 = const()[name = string("op_317"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_3_cast_fp16 = reshape(shape = var_317, x = mh_k_3_cast_fp16)[name = string("current_key_3_cast_fp16")]; + tensor var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor([1])]; + tensor var_320_cast_fp16 = expand_dims(axes = var_320_axes_0, x = kv_cache_update_mask)[name = string("op_320_cast_fp16")]; + tensor var_321_axes_0 = const()[name = string("op_321_axes_0"), val = tensor([2])]; + tensor var_321_cast_fp16 = expand_dims(axes = var_321_axes_0, x = var_320_cast_fp16)[name = string("op_321_cast_fp16")]; + fp16 var_173_to_fp16 = const()[name = string("op_173_to_fp16"), val = fp16(0x1p+0)]; + tensor var_323_cast_fp16 = sub(x = var_173_to_fp16, y = var_321_cast_fp16)[name = string("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = var_101_cast_fp16_0, y = var_323_cast_fp16)[name = string("op_324_cast_fp16")]; + tensor var_325_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_321_cast_fp16)[name = string("op_325_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_324_cast_fp16, y = var_325_cast_fp16)[name = string("key_3_cast_fp16")]; + tensor var_328_cast_fp16 = mul(x = var_132_cast_fp16_0, y = var_323_cast_fp16)[name = string("op_328_cast_fp16")]; + tensor var_329_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_321_cast_fp16)[name = string("op_329_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_328_cast_fp16, y = var_329_cast_fp16)[name = string("value_1_cast_fp16")]; + tensor var_333 = const()[name = string("op_333"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_1_cast_fp16 = reshape(shape = var_333, x = key_3_cast_fp16)[name = string("key_heads_1_cast_fp16")]; + tensor var_335 = const()[name = string("op_335"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_1_cast_fp16 = reshape(shape = var_335, x = value_1_cast_fp16)[name = string("value_heads_1_cast_fp16")]; + tensor var_338_begin_0 = const()[name = string("op_338_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_338_end_0 = const()[name = string("op_338_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_338_end_mask_0 = const()[name = string("op_338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_338_cast_fp16 = slice_by_index(begin = var_338_begin_0, end = var_338_end_0, end_mask = var_338_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_338_cast_fp16")]; + tensor var_342_begin_0 = const()[name = string("op_342_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_342_end_0 = const()[name = string("op_342_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_342_end_mask_0 = const()[name = string("op_342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_342_cast_fp16 = slice_by_index(begin = var_342_begin_0, end = var_342_end_0, end_mask = var_342_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_342_cast_fp16")]; + tensor var_354_begin_0 = const()[name = string("op_354_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_354_end_0 = const()[name = string("op_354_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_354_end_mask_0 = const()[name = string("op_354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_354_cast_fp16 = slice_by_index(begin = var_354_begin_0, end = var_354_end_0, end_mask = var_354_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_354_cast_fp16")]; + tensor var_358_begin_0 = const()[name = string("op_358_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_358_end_0 = const()[name = string("op_358_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_358_end_mask_0 = const()[name = string("op_358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_358_cast_fp16 = slice_by_index(begin = var_358_begin_0, end = var_358_end_0, end_mask = var_358_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_358_cast_fp16")]; + tensor var_370_begin_0 = const()[name = string("op_370_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_370_end_0 = const()[name = string("op_370_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_370_end_mask_0 = const()[name = string("op_370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_370_cast_fp16 = slice_by_index(begin = var_370_begin_0, end = var_370_end_0, end_mask = var_370_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_374_end_0 = const()[name = string("op_374_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_374_cast_fp16")]; + tensor var_386_begin_0 = const()[name = string("op_386_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_386_end_0 = const()[name = string("op_386_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_386_end_mask_0 = const()[name = string("op_386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_386_cast_fp16 = slice_by_index(begin = var_386_begin_0, end = var_386_end_0, end_mask = var_386_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_386_cast_fp16")]; + tensor var_390_begin_0 = const()[name = string("op_390_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_390_end_0 = const()[name = string("op_390_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_390_end_mask_0 = const()[name = string("op_390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_390_cast_fp16 = slice_by_index(begin = var_390_begin_0, end = var_390_end_0, end_mask = var_390_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_390_cast_fp16")]; + tensor var_402_begin_0 = const()[name = string("op_402_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_402_end_0 = const()[name = string("op_402_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_402_end_mask_0 = const()[name = string("op_402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_402_cast_fp16 = slice_by_index(begin = var_402_begin_0, end = var_402_end_0, end_mask = var_402_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_402_cast_fp16")]; + tensor var_406_begin_0 = const()[name = string("op_406_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_406_end_0 = const()[name = string("op_406_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_406_end_mask_0 = const()[name = string("op_406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_406_cast_fp16 = slice_by_index(begin = var_406_begin_0, end = var_406_end_0, end_mask = var_406_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_406_cast_fp16")]; + tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_418_end_0 = const()[name = string("op_418_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = var_418_end_0, end_mask = var_418_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_422_begin_0 = const()[name = string("op_422_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_422_end_0 = const()[name = string("op_422_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_422_end_mask_0 = const()[name = string("op_422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_422_cast_fp16 = slice_by_index(begin = var_422_begin_0, end = var_422_end_0, end_mask = var_422_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_422_cast_fp16")]; + tensor var_434_begin_0 = const()[name = string("op_434_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_434_end_0 = const()[name = string("op_434_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_434_end_mask_0 = const()[name = string("op_434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_434_cast_fp16 = slice_by_index(begin = var_434_begin_0, end = var_434_end_0, end_mask = var_434_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_434_cast_fp16")]; + tensor var_438_begin_0 = const()[name = string("op_438_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_438_end_0 = const()[name = string("op_438_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_438_end_mask_0 = const()[name = string("op_438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_438_cast_fp16 = slice_by_index(begin = var_438_begin_0, end = var_438_end_0, end_mask = var_438_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_438_cast_fp16")]; + tensor var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_450_end_0 = const()[name = string("op_450_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = var_450_end_0, end_mask = var_450_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_450_cast_fp16")]; + tensor var_454_begin_0 = const()[name = string("op_454_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_454_end_0 = const()[name = string("op_454_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_454_end_mask_0 = const()[name = string("op_454_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_454_cast_fp16 = slice_by_index(begin = var_454_begin_0, end = var_454_end_0, end_mask = var_454_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_454_cast_fp16")]; + bool key_heads_3_interleave_0 = const()[name = string("key_heads_3_interleave_0"), val = bool(false)]; + tensor key_heads_3_cast_fp16 = concat(axis = var_180, interleave = key_heads_3_interleave_0, values = (var_338_cast_fp16, var_338_cast_fp16, var_354_cast_fp16, var_354_cast_fp16, var_370_cast_fp16, var_370_cast_fp16, var_386_cast_fp16, var_386_cast_fp16, var_402_cast_fp16, var_402_cast_fp16, var_418_cast_fp16, var_418_cast_fp16, var_434_cast_fp16, var_434_cast_fp16, var_450_cast_fp16, var_450_cast_fp16))[name = string("key_heads_3_cast_fp16")]; + bool value_heads_3_interleave_0 = const()[name = string("value_heads_3_interleave_0"), val = bool(false)]; + tensor value_heads_3_cast_fp16 = concat(axis = var_180, interleave = value_heads_3_interleave_0, values = (var_342_cast_fp16, var_342_cast_fp16, var_358_cast_fp16, var_358_cast_fp16, var_374_cast_fp16, var_374_cast_fp16, var_390_cast_fp16, var_390_cast_fp16, var_406_cast_fp16, var_406_cast_fp16, var_422_cast_fp16, var_422_cast_fp16, var_438_cast_fp16, var_438_cast_fp16, var_454_cast_fp16, var_454_cast_fp16))[name = string("value_heads_3_cast_fp16")]; + fp16 var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_478_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_477_to_fp16)[name = string("op_478_cast_fp16")]; + bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; + bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_478_cast_fp16, y = key_heads_3_cast_fp16)[name = string("mh_w_1_cast_fp16")]; + tensor var_486_axes_0 = const()[name = string("op_486_axes_0"), val = tensor([1])]; + tensor var_486_cast_fp16 = expand_dims(axes = var_486_axes_0, x = key_padding_mask)[name = string("op_486_cast_fp16")]; + tensor var_487_axes_0 = const()[name = string("op_487_axes_0"), val = tensor([2])]; + tensor var_487_cast_fp16 = expand_dims(axes = var_487_axes_0, x = var_486_cast_fp16)[name = string("op_487_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_3_cast_fp16")]; + tensor var_490_cast_fp16 = softmax(axis = var_162, x = mh_w_3_cast_fp16)[name = string("op_490_cast_fp16")]; + bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; + bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = value_heads_3_cast_fp16, y = var_490_cast_fp16)[name = string("attn_1_cast_fp16")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([1, -1, 1, 1])]; + tensor input_1_cast_fp16 = reshape(shape = var_495, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; + string obj_11_pad_type_0 = const()[name = string("obj_11_pad_type_0"), val = string("valid")]; + tensor obj_11_strides_0 = const()[name = string("obj_11_strides_0"), val = tensor([1, 1])]; + tensor obj_11_pad_0 = const()[name = string("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_11_dilations_0 = const()[name = string("obj_11_dilations_0"), val = tensor([1, 1])]; + int32 obj_11_groups_0 = const()[name = string("obj_11_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4272512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6369728))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_11_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("obj_11_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = input_embeds, y = obj_11_cast_fp16)[name = string("inputs_5_cast_fp16")]; + tensor inputs_sq_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = inputs_5_cast_fp16)[name = string("inputs_sq_7_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = inputs_sq_7_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_513_to_fp16 = const()[name = string("op_513_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_514_cast_fp16 = add(x = variance_7_cast_fp16, y = var_513_to_fp16)[name = string("op_514_cast_fp16")]; + fp32 var_515_epsilon_0 = const()[name = string("op_515_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_515_cast_fp16 = rsqrt(epsilon = var_515_epsilon_0, x = var_514_cast_fp16)[name = string("op_515_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = var_515_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor w_7_to_fp16 = const()[name = string("w_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6370304)))]; + tensor input_3_cast_fp16 = mul(x = w_7_to_fp16, y = hidden_states_7_cast_fp16)[name = string("input_3_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6372416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9518208))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor var_529_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_529_cast_fp16")]; + string var_535_pad_type_0 = const()[name = string("op_535_pad_type_0"), val = string("valid")]; + tensor var_535_strides_0 = const()[name = string("op_535_strides_0"), val = tensor([1, 1])]; + tensor var_535_pad_0 = const()[name = string("op_535_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_535_dilations_0 = const()[name = string("op_535_dilations_0"), val = tensor([1, 1])]; + int32 var_535_groups_0 = const()[name = string("op_535_groups_0"), val = int32(1)]; + tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9518784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12664576))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_535_cast_fp16 = conv(dilations = var_535_dilations_0, groups = var_535_groups_0, pad = var_535_pad_0, pad_type = var_535_pad_type_0, strides = var_535_strides_0, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_535_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_529_cast_fp16, y = var_535_cast_fp16)[name = string("input_7_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12665152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810944))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_7_cast_fp16")]; + int32 var_549 = const()[name = string("op_549"), val = int32(3)]; + int32 var_559 = const()[name = string("op_559"), val = int32(-2)]; + int32 var_567 = const()[name = string("op_567"), val = int32(1)]; + tensor inputs_sq_9_cast_fp16 = mul(x = inputs_7_cast_fp16, y = inputs_7_cast_fp16)[name = string("inputs_sq_9_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = inputs_sq_9_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_580_cast_fp16 = add(x = variance_9_cast_fp16, y = var_579_to_fp16)[name = string("op_580_cast_fp16")]; + fp32 var_581_epsilon_0 = const()[name = string("op_581_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_581_cast_fp16 = rsqrt(epsilon = var_581_epsilon_0, x = var_580_cast_fp16)[name = string("op_581_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = mul(x = inputs_7_cast_fp16, y = var_581_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor w_9_to_fp16 = const()[name = string("w_9_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15811520)))]; + tensor obj_13_cast_fp16 = mul(x = w_9_to_fp16, y = hidden_states_11_cast_fp16)[name = string("obj_13_cast_fp16")]; + string query_7_pad_type_0 = const()[name = string("query_7_pad_type_0"), val = string("valid")]; + tensor query_7_strides_0 = const()[name = string("query_7_strides_0"), val = tensor([1, 1])]; + tensor query_7_pad_0 = const()[name = string("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_7_dilations_0 = const()[name = string("query_7_dilations_0"), val = tensor([1, 1])]; + int32 query_7_groups_0 = const()[name = string("query_7_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17910848))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_7_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("query_7_cast_fp16")]; + string current_key_5_pad_type_0 = const()[name = string("current_key_5_pad_type_0"), val = string("valid")]; + tensor current_key_5_strides_0 = const()[name = string("current_key_5_strides_0"), val = tensor([1, 1])]; + tensor current_key_5_pad_0 = const()[name = string("current_key_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_5_dilations_0 = const()[name = string("current_key_5_dilations_0"), val = tensor([1, 1])]; + int32 current_key_5_groups_0 = const()[name = string("current_key_5_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17911424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18960064))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_5_cast_fp16 = conv(dilations = current_key_5_dilations_0, groups = current_key_5_groups_0, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = current_key_5_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_key_5_cast_fp16")]; + string current_value_3_pad_type_0 = const()[name = string("current_value_3_pad_type_0"), val = string("valid")]; + tensor current_value_3_strides_0 = const()[name = string("current_value_3_strides_0"), val = tensor([1, 1])]; + tensor current_value_3_pad_0 = const()[name = string("current_value_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_3_dilations_0 = const()[name = string("current_value_3_dilations_0"), val = tensor([1, 1])]; + int32 current_value_3_groups_0 = const()[name = string("current_value_3_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18960640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20009280))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_3_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_value_3_cast_fp16")]; + tensor var_618 = const()[name = string("op_618"), val = tensor([16, 128, 1, 1])]; + tensor inputs_9_cast_fp16 = reshape(shape = var_618, x = query_7_cast_fp16)[name = string("inputs_9_cast_fp16")]; + tensor inputs_sq_11_cast_fp16 = mul(x = inputs_9_cast_fp16, y = inputs_9_cast_fp16)[name = string("inputs_sq_11_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = inputs_sq_11_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_624_to_fp16 = const()[name = string("op_624_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_625_cast_fp16 = add(x = variance_11_cast_fp16, y = var_624_to_fp16)[name = string("op_625_cast_fp16")]; + fp32 var_626_epsilon_0 = const()[name = string("op_626_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_626_cast_fp16 = rsqrt(epsilon = var_626_epsilon_0, x = var_625_cast_fp16)[name = string("op_626_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = mul(x = inputs_9_cast_fp16, y = var_626_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor w_11_to_fp16 = const()[name = string("w_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20009856)))]; + tensor query_normed_3_cast_fp16 = mul(x = w_11_to_fp16, y = hidden_states_13_cast_fp16)[name = string("query_normed_3_cast_fp16")]; + tensor var_634 = const()[name = string("op_634"), val = tensor([8, 128, 1, 1])]; + tensor inputs_11_cast_fp16 = reshape(shape = var_634, x = current_key_5_cast_fp16)[name = string("inputs_11_cast_fp16")]; + tensor inputs_sq_13_cast_fp16 = mul(x = inputs_11_cast_fp16, y = inputs_11_cast_fp16)[name = string("inputs_sq_13_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = inputs_sq_13_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_641_cast_fp16 = add(x = variance_13_cast_fp16, y = var_640_to_fp16)[name = string("op_641_cast_fp16")]; + fp32 var_642_epsilon_0 = const()[name = string("op_642_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_642_cast_fp16 = rsqrt(epsilon = var_642_epsilon_0, x = var_641_cast_fp16)[name = string("op_642_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = mul(x = inputs_11_cast_fp16, y = var_642_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor w_13_to_fp16 = const()[name = string("w_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20010176)))]; + tensor current_key_normed_3_cast_fp16 = mul(x = w_13_to_fp16, y = hidden_states_15_cast_fp16)[name = string("current_key_normed_3_cast_fp16")]; + tensor var_660 = const()[name = string("op_660"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_660, x = query_normed_3_cast_fp16)[name = string("mh_q_7_cast_fp16")]; + tensor var_662 = const()[name = string("op_662"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_5_cast_fp16 = reshape(shape = var_662, x = current_key_normed_3_cast_fp16)[name = string("mh_k_5_cast_fp16")]; + tensor var_666_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor var_671_begin_0 = const()[name = string("op_671_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_671_end_0 = const()[name = string("op_671_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_671_end_mask_0 = const()[name = string("op_671_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_671_cast_fp16")]; + tensor var_677_begin_0 = const()[name = string("op_677_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_677_end_0 = const()[name = string("op_677_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_677_end_mask_0 = const()[name = string("op_677_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_677_cast_fp16 = slice_by_index(begin = var_677_begin_0, end = var_677_end_0, end_mask = var_677_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_677_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_679_cast_fp16 = mul(x = var_677_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_679_cast_fp16")]; + bool var_681_interleave_0 = const()[name = string("op_681_interleave_0"), val = bool(false)]; + tensor var_681_cast_fp16 = concat(axis = var_559, interleave = var_681_interleave_0, values = (var_679_cast_fp16, var_671_cast_fp16))[name = string("op_681_cast_fp16")]; + tensor var_682_cast_fp16 = mul(x = var_681_cast_fp16, y = sin_1_cast_fp16)[name = string("op_682_cast_fp16")]; + tensor mh_q_9_cast_fp16 = add(x = var_666_cast_fp16, y = var_682_cast_fp16)[name = string("mh_q_9_cast_fp16")]; + tensor var_684_cast_fp16 = mul(x = mh_k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_684_cast_fp16")]; + tensor var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_689_end_0 = const()[name = string("op_689_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = var_689_end_0, end_mask = var_689_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_689_cast_fp16")]; + tensor var_695_begin_0 = const()[name = string("op_695_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_695_end_0 = const()[name = string("op_695_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_695_end_mask_0 = const()[name = string("op_695_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_695_cast_fp16 = slice_by_index(begin = var_695_begin_0, end = var_695_end_0, end_mask = var_695_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_695_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_697_cast_fp16 = mul(x = var_695_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_697_cast_fp16")]; + bool var_699_interleave_0 = const()[name = string("op_699_interleave_0"), val = bool(false)]; + tensor var_699_cast_fp16 = concat(axis = var_559, interleave = var_699_interleave_0, values = (var_697_cast_fp16, var_689_cast_fp16))[name = string("op_699_cast_fp16")]; + tensor var_700_cast_fp16 = mul(x = var_699_cast_fp16, y = sin_1_cast_fp16)[name = string("op_700_cast_fp16")]; + tensor mh_k_7_cast_fp16 = add(x = var_684_cast_fp16, y = var_700_cast_fp16)[name = string("mh_k_7_cast_fp16")]; + tensor var_704 = const()[name = string("op_704"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_7_cast_fp16 = reshape(shape = var_704, x = mh_k_7_cast_fp16)[name = string("current_key_7_cast_fp16")]; + tensor var_711_cast_fp16 = mul(x = var_101_cast_fp16_1, y = var_323_cast_fp16)[name = string("op_711_cast_fp16")]; + tensor var_712_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_321_cast_fp16)[name = string("op_712_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_711_cast_fp16, y = var_712_cast_fp16)[name = string("key_9_cast_fp16")]; + tensor var_715_cast_fp16 = mul(x = var_132_cast_fp16_1, y = var_323_cast_fp16)[name = string("op_715_cast_fp16")]; + tensor var_716_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_321_cast_fp16)[name = string("op_716_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_715_cast_fp16, y = var_716_cast_fp16)[name = string("value_5_cast_fp16")]; + tensor var_720 = const()[name = string("op_720"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_5_cast_fp16 = reshape(shape = var_720, x = key_9_cast_fp16)[name = string("key_heads_5_cast_fp16")]; + tensor var_722 = const()[name = string("op_722"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_5_cast_fp16 = reshape(shape = var_722, x = value_5_cast_fp16)[name = string("value_heads_5_cast_fp16")]; + tensor var_725_begin_0 = const()[name = string("op_725_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_725_end_0 = const()[name = string("op_725_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_725_end_mask_0 = const()[name = string("op_725_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_725_cast_fp16")]; + tensor var_729_begin_0 = const()[name = string("op_729_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_729_end_0 = const()[name = string("op_729_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_729_end_mask_0 = const()[name = string("op_729_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_729_cast_fp16 = slice_by_index(begin = var_729_begin_0, end = var_729_end_0, end_mask = var_729_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_729_cast_fp16")]; + tensor var_741_begin_0 = const()[name = string("op_741_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_741_end_0 = const()[name = string("op_741_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_741_end_mask_0 = const()[name = string("op_741_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_741_cast_fp16 = slice_by_index(begin = var_741_begin_0, end = var_741_end_0, end_mask = var_741_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_741_cast_fp16")]; + tensor var_745_begin_0 = const()[name = string("op_745_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_745_end_0 = const()[name = string("op_745_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_745_end_mask_0 = const()[name = string("op_745_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_745_cast_fp16 = slice_by_index(begin = var_745_begin_0, end = var_745_end_0, end_mask = var_745_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_745_cast_fp16")]; + tensor var_757_begin_0 = const()[name = string("op_757_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_757_end_0 = const()[name = string("op_757_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_757_end_mask_0 = const()[name = string("op_757_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_757_cast_fp16 = slice_by_index(begin = var_757_begin_0, end = var_757_end_0, end_mask = var_757_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_757_cast_fp16")]; + tensor var_761_begin_0 = const()[name = string("op_761_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_761_end_0 = const()[name = string("op_761_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_761_end_mask_0 = const()[name = string("op_761_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_761_cast_fp16 = slice_by_index(begin = var_761_begin_0, end = var_761_end_0, end_mask = var_761_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_761_cast_fp16")]; + tensor var_773_begin_0 = const()[name = string("op_773_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_773_end_0 = const()[name = string("op_773_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_773_end_mask_0 = const()[name = string("op_773_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_773_cast_fp16 = slice_by_index(begin = var_773_begin_0, end = var_773_end_0, end_mask = var_773_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_773_cast_fp16")]; + tensor var_777_begin_0 = const()[name = string("op_777_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_777_end_0 = const()[name = string("op_777_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_777_end_mask_0 = const()[name = string("op_777_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_777_cast_fp16 = slice_by_index(begin = var_777_begin_0, end = var_777_end_0, end_mask = var_777_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_777_cast_fp16")]; + tensor var_789_begin_0 = const()[name = string("op_789_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_789_end_0 = const()[name = string("op_789_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_789_end_mask_0 = const()[name = string("op_789_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_789_cast_fp16 = slice_by_index(begin = var_789_begin_0, end = var_789_end_0, end_mask = var_789_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_789_cast_fp16")]; + tensor var_793_begin_0 = const()[name = string("op_793_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_793_end_0 = const()[name = string("op_793_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_793_end_mask_0 = const()[name = string("op_793_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_793_cast_fp16 = slice_by_index(begin = var_793_begin_0, end = var_793_end_0, end_mask = var_793_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_793_cast_fp16")]; + tensor var_805_begin_0 = const()[name = string("op_805_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_805_end_0 = const()[name = string("op_805_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_805_end_mask_0 = const()[name = string("op_805_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_805_cast_fp16 = slice_by_index(begin = var_805_begin_0, end = var_805_end_0, end_mask = var_805_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_805_cast_fp16")]; + tensor var_809_begin_0 = const()[name = string("op_809_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_809_end_0 = const()[name = string("op_809_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_809_end_mask_0 = const()[name = string("op_809_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_809_cast_fp16 = slice_by_index(begin = var_809_begin_0, end = var_809_end_0, end_mask = var_809_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_809_cast_fp16")]; + tensor var_821_begin_0 = const()[name = string("op_821_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_821_end_0 = const()[name = string("op_821_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_821_end_mask_0 = const()[name = string("op_821_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = var_821_end_0, end_mask = var_821_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_821_cast_fp16")]; + tensor var_825_begin_0 = const()[name = string("op_825_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_825_end_0 = const()[name = string("op_825_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_825_end_mask_0 = const()[name = string("op_825_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_825_cast_fp16 = slice_by_index(begin = var_825_begin_0, end = var_825_end_0, end_mask = var_825_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_825_cast_fp16")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_837_end_0 = const()[name = string("op_837_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_837_cast_fp16")]; + tensor var_841_begin_0 = const()[name = string("op_841_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_841_end_0 = const()[name = string("op_841_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_841_end_mask_0 = const()[name = string("op_841_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_841_cast_fp16 = slice_by_index(begin = var_841_begin_0, end = var_841_end_0, end_mask = var_841_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_841_cast_fp16")]; + bool key_heads_7_interleave_0 = const()[name = string("key_heads_7_interleave_0"), val = bool(false)]; + tensor key_heads_7_cast_fp16 = concat(axis = var_567, interleave = key_heads_7_interleave_0, values = (var_725_cast_fp16, var_725_cast_fp16, var_741_cast_fp16, var_741_cast_fp16, var_757_cast_fp16, var_757_cast_fp16, var_773_cast_fp16, var_773_cast_fp16, var_789_cast_fp16, var_789_cast_fp16, var_805_cast_fp16, var_805_cast_fp16, var_821_cast_fp16, var_821_cast_fp16, var_837_cast_fp16, var_837_cast_fp16))[name = string("key_heads_7_cast_fp16")]; + bool value_heads_7_interleave_0 = const()[name = string("value_heads_7_interleave_0"), val = bool(false)]; + tensor value_heads_7_cast_fp16 = concat(axis = var_567, interleave = value_heads_7_interleave_0, values = (var_729_cast_fp16, var_729_cast_fp16, var_745_cast_fp16, var_745_cast_fp16, var_761_cast_fp16, var_761_cast_fp16, var_777_cast_fp16, var_777_cast_fp16, var_793_cast_fp16, var_793_cast_fp16, var_809_cast_fp16, var_809_cast_fp16, var_825_cast_fp16, var_825_cast_fp16, var_841_cast_fp16, var_841_cast_fp16))[name = string("value_heads_7_cast_fp16")]; + fp16 var_864_to_fp16 = const()[name = string("op_864_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_865_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_864_to_fp16)[name = string("op_865_cast_fp16")]; + bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; + bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_865_cast_fp16, y = key_heads_7_cast_fp16)[name = string("mh_w_5_cast_fp16")]; + tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_7_cast_fp16")]; + tensor var_877_cast_fp16 = softmax(axis = var_549, x = mh_w_7_cast_fp16)[name = string("op_877_cast_fp16")]; + bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; + bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = value_heads_7_cast_fp16, y = var_877_cast_fp16)[name = string("attn_3_cast_fp16")]; + tensor var_882 = const()[name = string("op_882"), val = tensor([1, -1, 1, 1])]; + tensor input_9_cast_fp16 = reshape(shape = var_882, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; + string obj_19_pad_type_0 = const()[name = string("obj_19_pad_type_0"), val = string("valid")]; + tensor obj_19_strides_0 = const()[name = string("obj_19_strides_0"), val = tensor([1, 1])]; + tensor obj_19_pad_0 = const()[name = string("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_19_dilations_0 = const()[name = string("obj_19_dilations_0"), val = tensor([1, 1])]; + int32 obj_19_groups_0 = const()[name = string("obj_19_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20010496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22107712))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_19_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("obj_19_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_13_cast_fp16")]; + tensor inputs_sq_15_cast_fp16 = mul(x = inputs_13_cast_fp16, y = inputs_13_cast_fp16)[name = string("inputs_sq_15_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = inputs_sq_15_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_900_to_fp16 = const()[name = string("op_900_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_901_cast_fp16 = add(x = variance_15_cast_fp16, y = var_900_to_fp16)[name = string("op_901_cast_fp16")]; + fp32 var_902_epsilon_0 = const()[name = string("op_902_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_902_cast_fp16 = rsqrt(epsilon = var_902_epsilon_0, x = var_901_cast_fp16)[name = string("op_902_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = mul(x = inputs_13_cast_fp16, y = var_902_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor w_15_to_fp16 = const()[name = string("w_15_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22108288)))]; + tensor input_11_cast_fp16 = mul(x = w_15_to_fp16, y = hidden_states_17_cast_fp16)[name = string("input_11_cast_fp16")]; + string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; + tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; + tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; + int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; + tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22110400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25256192))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor var_916_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_916_cast_fp16")]; + string var_922_pad_type_0 = const()[name = string("op_922_pad_type_0"), val = string("valid")]; + tensor var_922_strides_0 = const()[name = string("op_922_strides_0"), val = tensor([1, 1])]; + tensor var_922_pad_0 = const()[name = string("op_922_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_922_dilations_0 = const()[name = string("op_922_dilations_0"), val = tensor([1, 1])]; + int32 var_922_groups_0 = const()[name = string("op_922_groups_0"), val = int32(1)]; + tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25256768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28402560))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_922_cast_fp16 = conv(dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_922_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_916_cast_fp16, y = var_922_cast_fp16)[name = string("input_15_cast_fp16")]; + string hidden_states_19_pad_type_0 = const()[name = string("hidden_states_19_pad_type_0"), val = string("valid")]; + tensor hidden_states_19_strides_0 = const()[name = string("hidden_states_19_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_19_pad_0 = const()[name = string("hidden_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_19_dilations_0 = const()[name = string("hidden_states_19_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_19_groups_0 = const()[name = string("hidden_states_19_groups_0"), val = int32(1)]; + tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28403136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31548928))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_19_cast_fp16 = conv(dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_15_cast_fp16")]; + int32 var_936 = const()[name = string("op_936"), val = int32(3)]; + int32 var_946 = const()[name = string("op_946"), val = int32(-2)]; + int32 var_954 = const()[name = string("op_954"), val = int32(1)]; + tensor inputs_sq_17_cast_fp16 = mul(x = inputs_15_cast_fp16, y = inputs_15_cast_fp16)[name = string("inputs_sq_17_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = inputs_sq_17_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_966_to_fp16 = const()[name = string("op_966_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_967_cast_fp16 = add(x = variance_17_cast_fp16, y = var_966_to_fp16)[name = string("op_967_cast_fp16")]; + fp32 var_968_epsilon_0 = const()[name = string("op_968_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_968_cast_fp16 = rsqrt(epsilon = var_968_epsilon_0, x = var_967_cast_fp16)[name = string("op_968_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = mul(x = inputs_15_cast_fp16, y = var_968_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor w_17_to_fp16 = const()[name = string("w_17_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31549504)))]; + tensor obj_21_cast_fp16 = mul(x = w_17_to_fp16, y = hidden_states_21_cast_fp16)[name = string("obj_21_cast_fp16")]; + string query_13_pad_type_0 = const()[name = string("query_13_pad_type_0"), val = string("valid")]; + tensor query_13_strides_0 = const()[name = string("query_13_strides_0"), val = tensor([1, 1])]; + tensor query_13_pad_0 = const()[name = string("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_13_dilations_0 = const()[name = string("query_13_dilations_0"), val = tensor([1, 1])]; + int32 query_13_groups_0 = const()[name = string("query_13_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31551616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33648832))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_13_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("query_13_cast_fp16")]; + string current_key_9_pad_type_0 = const()[name = string("current_key_9_pad_type_0"), val = string("valid")]; + tensor current_key_9_strides_0 = const()[name = string("current_key_9_strides_0"), val = tensor([1, 1])]; + tensor current_key_9_pad_0 = const()[name = string("current_key_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_9_dilations_0 = const()[name = string("current_key_9_dilations_0"), val = tensor([1, 1])]; + int32 current_key_9_groups_0 = const()[name = string("current_key_9_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33649408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34698048))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_9_cast_fp16 = conv(dilations = current_key_9_dilations_0, groups = current_key_9_groups_0, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = current_key_9_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_key_9_cast_fp16")]; + string current_value_5_pad_type_0 = const()[name = string("current_value_5_pad_type_0"), val = string("valid")]; + tensor current_value_5_strides_0 = const()[name = string("current_value_5_strides_0"), val = tensor([1, 1])]; + tensor current_value_5_pad_0 = const()[name = string("current_value_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_5_dilations_0 = const()[name = string("current_value_5_dilations_0"), val = tensor([1, 1])]; + int32 current_value_5_groups_0 = const()[name = string("current_value_5_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34698624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35747264))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_5_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_5_dilations_0, groups = current_value_5_groups_0, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = current_value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_value_5_cast_fp16")]; + tensor var_1005 = const()[name = string("op_1005"), val = tensor([16, 128, 1, 1])]; + tensor inputs_17_cast_fp16 = reshape(shape = var_1005, x = query_13_cast_fp16)[name = string("inputs_17_cast_fp16")]; + tensor inputs_sq_19_cast_fp16 = mul(x = inputs_17_cast_fp16, y = inputs_17_cast_fp16)[name = string("inputs_sq_19_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = inputs_sq_19_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_1011_to_fp16 = const()[name = string("op_1011_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1012_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1011_to_fp16)[name = string("op_1012_cast_fp16")]; + fp32 var_1013_epsilon_0 = const()[name = string("op_1013_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1013_cast_fp16 = rsqrt(epsilon = var_1013_epsilon_0, x = var_1012_cast_fp16)[name = string("op_1013_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = inputs_17_cast_fp16, y = var_1013_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor w_19_to_fp16 = const()[name = string("w_19_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35747840)))]; + tensor query_normed_5_cast_fp16 = mul(x = w_19_to_fp16, y = hidden_states_23_cast_fp16)[name = string("query_normed_5_cast_fp16")]; + tensor var_1021 = const()[name = string("op_1021"), val = tensor([8, 128, 1, 1])]; + tensor inputs_19_cast_fp16 = reshape(shape = var_1021, x = current_key_9_cast_fp16)[name = string("inputs_19_cast_fp16")]; + tensor inputs_sq_21_cast_fp16 = mul(x = inputs_19_cast_fp16, y = inputs_19_cast_fp16)[name = string("inputs_sq_21_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = inputs_sq_21_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_1027_to_fp16 = const()[name = string("op_1027_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1028_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1027_to_fp16)[name = string("op_1028_cast_fp16")]; + fp32 var_1029_epsilon_0 = const()[name = string("op_1029_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1029_cast_fp16 = rsqrt(epsilon = var_1029_epsilon_0, x = var_1028_cast_fp16)[name = string("op_1029_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = mul(x = inputs_19_cast_fp16, y = var_1029_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + tensor w_21_to_fp16 = const()[name = string("w_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35748160)))]; + tensor current_key_normed_5_cast_fp16 = mul(x = w_21_to_fp16, y = hidden_states_25_cast_fp16)[name = string("current_key_normed_5_cast_fp16")]; + tensor var_1047 = const()[name = string("op_1047"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1047, x = query_normed_5_cast_fp16)[name = string("mh_q_13_cast_fp16")]; + tensor var_1049 = const()[name = string("op_1049"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_9_cast_fp16 = reshape(shape = var_1049, x = current_key_normed_5_cast_fp16)[name = string("mh_k_9_cast_fp16")]; + tensor var_1053_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1053_cast_fp16")]; + tensor var_1058_begin_0 = const()[name = string("op_1058_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1058_end_0 = const()[name = string("op_1058_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1058_end_mask_0 = const()[name = string("op_1058_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1058_cast_fp16 = slice_by_index(begin = var_1058_begin_0, end = var_1058_end_0, end_mask = var_1058_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_1058_cast_fp16")]; + tensor var_1064_begin_0 = const()[name = string("op_1064_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1064_end_0 = const()[name = string("op_1064_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1064_end_mask_0 = const()[name = string("op_1064_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1064_cast_fp16 = slice_by_index(begin = var_1064_begin_0, end = var_1064_end_0, end_mask = var_1064_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_1064_cast_fp16")]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1066_cast_fp16 = mul(x = var_1064_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1066_cast_fp16")]; + bool var_1068_interleave_0 = const()[name = string("op_1068_interleave_0"), val = bool(false)]; + tensor var_1068_cast_fp16 = concat(axis = var_946, interleave = var_1068_interleave_0, values = (var_1066_cast_fp16, var_1058_cast_fp16))[name = string("op_1068_cast_fp16")]; + tensor var_1069_cast_fp16 = mul(x = var_1068_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1069_cast_fp16")]; + tensor mh_q_15_cast_fp16 = add(x = var_1053_cast_fp16, y = var_1069_cast_fp16)[name = string("mh_q_15_cast_fp16")]; + tensor var_1071_cast_fp16 = mul(x = mh_k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1071_cast_fp16")]; + tensor var_1076_begin_0 = const()[name = string("op_1076_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1076_end_0 = const()[name = string("op_1076_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1076_end_mask_0 = const()[name = string("op_1076_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1076_cast_fp16 = slice_by_index(begin = var_1076_begin_0, end = var_1076_end_0, end_mask = var_1076_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1076_cast_fp16")]; + tensor var_1082_begin_0 = const()[name = string("op_1082_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1082_end_0 = const()[name = string("op_1082_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1082_end_mask_0 = const()[name = string("op_1082_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1082_cast_fp16 = slice_by_index(begin = var_1082_begin_0, end = var_1082_end_0, end_mask = var_1082_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1082_cast_fp16")]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1084_cast_fp16 = mul(x = var_1082_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_1084_cast_fp16")]; + bool var_1086_interleave_0 = const()[name = string("op_1086_interleave_0"), val = bool(false)]; + tensor var_1086_cast_fp16 = concat(axis = var_946, interleave = var_1086_interleave_0, values = (var_1084_cast_fp16, var_1076_cast_fp16))[name = string("op_1086_cast_fp16")]; + tensor var_1087_cast_fp16 = mul(x = var_1086_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1087_cast_fp16")]; + tensor mh_k_11_cast_fp16 = add(x = var_1071_cast_fp16, y = var_1087_cast_fp16)[name = string("mh_k_11_cast_fp16")]; + tensor var_1091 = const()[name = string("op_1091"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_11_cast_fp16 = reshape(shape = var_1091, x = mh_k_11_cast_fp16)[name = string("current_key_11_cast_fp16")]; + tensor var_1098_cast_fp16 = mul(x = var_101_cast_fp16_2, y = var_323_cast_fp16)[name = string("op_1098_cast_fp16")]; + tensor var_1099_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_321_cast_fp16)[name = string("op_1099_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1098_cast_fp16, y = var_1099_cast_fp16)[name = string("key_15_cast_fp16")]; + tensor var_1102_cast_fp16 = mul(x = var_132_cast_fp16_2, y = var_323_cast_fp16)[name = string("op_1102_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_321_cast_fp16)[name = string("op_1103_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1102_cast_fp16, y = var_1103_cast_fp16)[name = string("value_9_cast_fp16")]; + tensor var_1107 = const()[name = string("op_1107"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_9_cast_fp16 = reshape(shape = var_1107, x = key_15_cast_fp16)[name = string("key_heads_9_cast_fp16")]; + tensor var_1109 = const()[name = string("op_1109"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_9_cast_fp16 = reshape(shape = var_1109, x = value_9_cast_fp16)[name = string("value_heads_9_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = string("op_1112_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1112_cast_fp16")]; + tensor var_1116_begin_0 = const()[name = string("op_1116_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1116_end_0 = const()[name = string("op_1116_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1116_end_mask_0 = const()[name = string("op_1116_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1116_cast_fp16 = slice_by_index(begin = var_1116_begin_0, end = var_1116_end_0, end_mask = var_1116_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1116_cast_fp16")]; + tensor var_1128_begin_0 = const()[name = string("op_1128_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1128_end_0 = const()[name = string("op_1128_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1128_end_mask_0 = const()[name = string("op_1128_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1128_cast_fp16 = slice_by_index(begin = var_1128_begin_0, end = var_1128_end_0, end_mask = var_1128_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1128_cast_fp16")]; + tensor var_1132_begin_0 = const()[name = string("op_1132_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1132_end_0 = const()[name = string("op_1132_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1132_end_mask_0 = const()[name = string("op_1132_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1132_cast_fp16 = slice_by_index(begin = var_1132_begin_0, end = var_1132_end_0, end_mask = var_1132_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1132_cast_fp16")]; + tensor var_1144_begin_0 = const()[name = string("op_1144_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1144_end_0 = const()[name = string("op_1144_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1144_end_mask_0 = const()[name = string("op_1144_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1144_cast_fp16 = slice_by_index(begin = var_1144_begin_0, end = var_1144_end_0, end_mask = var_1144_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = string("op_1148_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1148_end_0 = const()[name = string("op_1148_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1148_end_mask_0 = const()[name = string("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = string("op_1160_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1160_end_0 = const()[name = string("op_1160_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1160_end_mask_0 = const()[name = string("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1160_cast_fp16")]; + tensor var_1164_begin_0 = const()[name = string("op_1164_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1164_end_0 = const()[name = string("op_1164_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1164_end_mask_0 = const()[name = string("op_1164_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1164_cast_fp16 = slice_by_index(begin = var_1164_begin_0, end = var_1164_end_0, end_mask = var_1164_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1164_cast_fp16")]; + tensor var_1176_begin_0 = const()[name = string("op_1176_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1176_end_0 = const()[name = string("op_1176_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1176_end_mask_0 = const()[name = string("op_1176_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1176_cast_fp16 = slice_by_index(begin = var_1176_begin_0, end = var_1176_end_0, end_mask = var_1176_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1176_cast_fp16")]; + tensor var_1180_begin_0 = const()[name = string("op_1180_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1180_end_0 = const()[name = string("op_1180_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1180_end_mask_0 = const()[name = string("op_1180_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1180_cast_fp16 = slice_by_index(begin = var_1180_begin_0, end = var_1180_end_0, end_mask = var_1180_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1180_cast_fp16")]; + tensor var_1192_begin_0 = const()[name = string("op_1192_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1192_end_0 = const()[name = string("op_1192_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1192_end_mask_0 = const()[name = string("op_1192_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1192_cast_fp16 = slice_by_index(begin = var_1192_begin_0, end = var_1192_end_0, end_mask = var_1192_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1192_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = string("op_1196_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1196_end_0 = const()[name = string("op_1196_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1196_end_mask_0 = const()[name = string("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1196_cast_fp16")]; + tensor var_1208_begin_0 = const()[name = string("op_1208_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1208_end_0 = const()[name = string("op_1208_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1208_end_mask_0 = const()[name = string("op_1208_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1208_cast_fp16 = slice_by_index(begin = var_1208_begin_0, end = var_1208_end_0, end_mask = var_1208_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1208_cast_fp16")]; + tensor var_1212_begin_0 = const()[name = string("op_1212_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1212_end_0 = const()[name = string("op_1212_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1212_end_mask_0 = const()[name = string("op_1212_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1212_cast_fp16 = slice_by_index(begin = var_1212_begin_0, end = var_1212_end_0, end_mask = var_1212_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1212_cast_fp16")]; + tensor var_1224_begin_0 = const()[name = string("op_1224_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1224_end_0 = const()[name = string("op_1224_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1224_end_mask_0 = const()[name = string("op_1224_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1224_cast_fp16 = slice_by_index(begin = var_1224_begin_0, end = var_1224_end_0, end_mask = var_1224_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1224_cast_fp16")]; + tensor var_1228_begin_0 = const()[name = string("op_1228_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1228_end_0 = const()[name = string("op_1228_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1228_end_mask_0 = const()[name = string("op_1228_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1228_cast_fp16 = slice_by_index(begin = var_1228_begin_0, end = var_1228_end_0, end_mask = var_1228_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1228_cast_fp16")]; + bool key_heads_11_interleave_0 = const()[name = string("key_heads_11_interleave_0"), val = bool(false)]; + tensor key_heads_11_cast_fp16 = concat(axis = var_954, interleave = key_heads_11_interleave_0, values = (var_1112_cast_fp16, var_1112_cast_fp16, var_1128_cast_fp16, var_1128_cast_fp16, var_1144_cast_fp16, var_1144_cast_fp16, var_1160_cast_fp16, var_1160_cast_fp16, var_1176_cast_fp16, var_1176_cast_fp16, var_1192_cast_fp16, var_1192_cast_fp16, var_1208_cast_fp16, var_1208_cast_fp16, var_1224_cast_fp16, var_1224_cast_fp16))[name = string("key_heads_11_cast_fp16")]; + bool value_heads_11_interleave_0 = const()[name = string("value_heads_11_interleave_0"), val = bool(false)]; + tensor value_heads_11_cast_fp16 = concat(axis = var_954, interleave = value_heads_11_interleave_0, values = (var_1116_cast_fp16, var_1116_cast_fp16, var_1132_cast_fp16, var_1132_cast_fp16, var_1148_cast_fp16, var_1148_cast_fp16, var_1164_cast_fp16, var_1164_cast_fp16, var_1180_cast_fp16, var_1180_cast_fp16, var_1196_cast_fp16, var_1196_cast_fp16, var_1212_cast_fp16, var_1212_cast_fp16, var_1228_cast_fp16, var_1228_cast_fp16))[name = string("value_heads_11_cast_fp16")]; + fp16 var_1251_to_fp16 = const()[name = string("op_1251_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1252_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1251_to_fp16)[name = string("op_1252_cast_fp16")]; + bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; + bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1252_cast_fp16, y = key_heads_11_cast_fp16)[name = string("mh_w_9_cast_fp16")]; + tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_11_cast_fp16")]; + tensor var_1264_cast_fp16 = softmax(axis = var_936, x = mh_w_11_cast_fp16)[name = string("op_1264_cast_fp16")]; + bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; + bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = value_heads_11_cast_fp16, y = var_1264_cast_fp16)[name = string("attn_5_cast_fp16")]; + tensor var_1269 = const()[name = string("op_1269"), val = tensor([1, -1, 1, 1])]; + tensor input_17_cast_fp16 = reshape(shape = var_1269, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")]; + string obj_27_pad_type_0 = const()[name = string("obj_27_pad_type_0"), val = string("valid")]; + tensor obj_27_strides_0 = const()[name = string("obj_27_strides_0"), val = tensor([1, 1])]; + tensor obj_27_pad_0 = const()[name = string("obj_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_27_dilations_0 = const()[name = string("obj_27_dilations_0"), val = tensor([1, 1])]; + int32 obj_27_groups_0 = const()[name = string("obj_27_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35748480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37845696))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_27_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_27_dilations_0, groups = obj_27_groups_0, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = obj_27_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("obj_27_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_21_cast_fp16")]; + tensor inputs_sq_23_cast_fp16 = mul(x = inputs_21_cast_fp16, y = inputs_21_cast_fp16)[name = string("inputs_sq_23_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = inputs_sq_23_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1287_to_fp16 = const()[name = string("op_1287_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1288_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1287_to_fp16)[name = string("op_1288_cast_fp16")]; + fp32 var_1289_epsilon_0 = const()[name = string("op_1289_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1289_cast_fp16 = rsqrt(epsilon = var_1289_epsilon_0, x = var_1288_cast_fp16)[name = string("op_1289_cast_fp16")]; + tensor hidden_states_27_cast_fp16 = mul(x = inputs_21_cast_fp16, y = var_1289_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor w_23_to_fp16 = const()[name = string("w_23_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37846272)))]; + tensor input_19_cast_fp16 = mul(x = w_23_to_fp16, y = hidden_states_27_cast_fp16)[name = string("input_19_cast_fp16")]; + string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; + tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; + tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; + int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; + tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37848384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40994176))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_1303_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_1303_cast_fp16")]; + string var_1309_pad_type_0 = const()[name = string("op_1309_pad_type_0"), val = string("valid")]; + tensor var_1309_strides_0 = const()[name = string("op_1309_strides_0"), val = tensor([1, 1])]; + tensor var_1309_pad_0 = const()[name = string("op_1309_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1309_dilations_0 = const()[name = string("op_1309_dilations_0"), val = tensor([1, 1])]; + int32 var_1309_groups_0 = const()[name = string("op_1309_groups_0"), val = int32(1)]; + tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40994752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44140544))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1309_cast_fp16 = conv(dilations = var_1309_dilations_0, groups = var_1309_groups_0, pad = var_1309_pad_0, pad_type = var_1309_pad_type_0, strides = var_1309_strides_0, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_1303_cast_fp16, y = var_1309_cast_fp16)[name = string("input_23_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44141120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47286912))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("inputs_23_cast_fp16")]; + int32 var_1323 = const()[name = string("op_1323"), val = int32(3)]; + int32 var_1333 = const()[name = string("op_1333"), val = int32(-2)]; + int32 var_1341 = const()[name = string("op_1341"), val = int32(1)]; + tensor inputs_sq_25_cast_fp16 = mul(x = inputs_23_cast_fp16, y = inputs_23_cast_fp16)[name = string("inputs_sq_25_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = inputs_sq_25_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1353_to_fp16 = const()[name = string("op_1353_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1354_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1353_to_fp16)[name = string("op_1354_cast_fp16")]; + fp32 var_1355_epsilon_0 = const()[name = string("op_1355_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1355_cast_fp16 = rsqrt(epsilon = var_1355_epsilon_0, x = var_1354_cast_fp16)[name = string("op_1355_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = mul(x = inputs_23_cast_fp16, y = var_1355_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor w_25_to_fp16 = const()[name = string("w_25_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47287488)))]; + tensor obj_29_cast_fp16 = mul(x = w_25_to_fp16, y = hidden_states_31_cast_fp16)[name = string("obj_29_cast_fp16")]; + string query_19_pad_type_0 = const()[name = string("query_19_pad_type_0"), val = string("valid")]; + tensor query_19_strides_0 = const()[name = string("query_19_strides_0"), val = tensor([1, 1])]; + tensor query_19_pad_0 = const()[name = string("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_19_dilations_0 = const()[name = string("query_19_dilations_0"), val = tensor([1, 1])]; + int32 query_19_groups_0 = const()[name = string("query_19_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47289600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49386816))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_19_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("query_19_cast_fp16")]; + string current_key_13_pad_type_0 = const()[name = string("current_key_13_pad_type_0"), val = string("valid")]; + tensor current_key_13_strides_0 = const()[name = string("current_key_13_strides_0"), val = tensor([1, 1])]; + tensor current_key_13_pad_0 = const()[name = string("current_key_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_13_dilations_0 = const()[name = string("current_key_13_dilations_0"), val = tensor([1, 1])]; + int32 current_key_13_groups_0 = const()[name = string("current_key_13_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49387392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50436032))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_13_cast_fp16 = conv(dilations = current_key_13_dilations_0, groups = current_key_13_groups_0, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = current_key_13_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_key_13_cast_fp16")]; + string current_value_7_pad_type_0 = const()[name = string("current_value_7_pad_type_0"), val = string("valid")]; + tensor current_value_7_strides_0 = const()[name = string("current_value_7_strides_0"), val = tensor([1, 1])]; + tensor current_value_7_pad_0 = const()[name = string("current_value_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_7_dilations_0 = const()[name = string("current_value_7_dilations_0"), val = tensor([1, 1])]; + int32 current_value_7_groups_0 = const()[name = string("current_value_7_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50436608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51485248))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_7_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_7_dilations_0, groups = current_value_7_groups_0, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = current_value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_value_7_cast_fp16")]; + tensor var_1392 = const()[name = string("op_1392"), val = tensor([16, 128, 1, 1])]; + tensor inputs_25_cast_fp16 = reshape(shape = var_1392, x = query_19_cast_fp16)[name = string("inputs_25_cast_fp16")]; + tensor inputs_sq_27_cast_fp16 = mul(x = inputs_25_cast_fp16, y = inputs_25_cast_fp16)[name = string("inputs_sq_27_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = inputs_sq_27_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1398_to_fp16 = const()[name = string("op_1398_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1399_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1398_to_fp16)[name = string("op_1399_cast_fp16")]; + fp32 var_1400_epsilon_0 = const()[name = string("op_1400_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1400_cast_fp16 = rsqrt(epsilon = var_1400_epsilon_0, x = var_1399_cast_fp16)[name = string("op_1400_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = inputs_25_cast_fp16, y = var_1400_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor w_27_to_fp16 = const()[name = string("w_27_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51485824)))]; + tensor query_normed_7_cast_fp16 = mul(x = w_27_to_fp16, y = hidden_states_33_cast_fp16)[name = string("query_normed_7_cast_fp16")]; + tensor var_1408 = const()[name = string("op_1408"), val = tensor([8, 128, 1, 1])]; + tensor inputs_27_cast_fp16 = reshape(shape = var_1408, x = current_key_13_cast_fp16)[name = string("inputs_27_cast_fp16")]; + tensor inputs_sq_29_cast_fp16 = mul(x = inputs_27_cast_fp16, y = inputs_27_cast_fp16)[name = string("inputs_sq_29_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = inputs_sq_29_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1414_to_fp16 = const()[name = string("op_1414_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1415_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1414_to_fp16)[name = string("op_1415_cast_fp16")]; + fp32 var_1416_epsilon_0 = const()[name = string("op_1416_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1416_cast_fp16 = rsqrt(epsilon = var_1416_epsilon_0, x = var_1415_cast_fp16)[name = string("op_1416_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = mul(x = inputs_27_cast_fp16, y = var_1416_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor w_29_to_fp16 = const()[name = string("w_29_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51486144)))]; + tensor current_key_normed_7_cast_fp16 = mul(x = w_29_to_fp16, y = hidden_states_35_cast_fp16)[name = string("current_key_normed_7_cast_fp16")]; + tensor var_1434 = const()[name = string("op_1434"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1434, x = query_normed_7_cast_fp16)[name = string("mh_q_19_cast_fp16")]; + tensor var_1436 = const()[name = string("op_1436"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_13_cast_fp16 = reshape(shape = var_1436, x = current_key_normed_7_cast_fp16)[name = string("mh_k_13_cast_fp16")]; + tensor var_1440_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1440_cast_fp16")]; + tensor var_1445_begin_0 = const()[name = string("op_1445_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1445_end_0 = const()[name = string("op_1445_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1445_end_mask_0 = const()[name = string("op_1445_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1445_cast_fp16 = slice_by_index(begin = var_1445_begin_0, end = var_1445_end_0, end_mask = var_1445_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1445_cast_fp16")]; + tensor var_1451_begin_0 = const()[name = string("op_1451_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1451_end_0 = const()[name = string("op_1451_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1451_end_mask_0 = const()[name = string("op_1451_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1451_cast_fp16 = slice_by_index(begin = var_1451_begin_0, end = var_1451_end_0, end_mask = var_1451_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1451_cast_fp16")]; + fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1453_cast_fp16 = mul(x = var_1451_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_1453_cast_fp16")]; + bool var_1455_interleave_0 = const()[name = string("op_1455_interleave_0"), val = bool(false)]; + tensor var_1455_cast_fp16 = concat(axis = var_1333, interleave = var_1455_interleave_0, values = (var_1453_cast_fp16, var_1445_cast_fp16))[name = string("op_1455_cast_fp16")]; + tensor var_1456_cast_fp16 = mul(x = var_1455_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1456_cast_fp16")]; + tensor mh_q_21_cast_fp16 = add(x = var_1440_cast_fp16, y = var_1456_cast_fp16)[name = string("mh_q_21_cast_fp16")]; + tensor var_1458_cast_fp16 = mul(x = mh_k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1458_cast_fp16")]; + tensor var_1463_begin_0 = const()[name = string("op_1463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1463_end_0 = const()[name = string("op_1463_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1463_end_mask_0 = const()[name = string("op_1463_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1463_cast_fp16 = slice_by_index(begin = var_1463_begin_0, end = var_1463_end_0, end_mask = var_1463_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1463_cast_fp16")]; + tensor var_1469_begin_0 = const()[name = string("op_1469_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1469_end_0 = const()[name = string("op_1469_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1469_end_mask_0 = const()[name = string("op_1469_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1469_cast_fp16 = slice_by_index(begin = var_1469_begin_0, end = var_1469_end_0, end_mask = var_1469_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1469_cast_fp16")]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1471_cast_fp16 = mul(x = var_1469_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_1471_cast_fp16")]; + bool var_1473_interleave_0 = const()[name = string("op_1473_interleave_0"), val = bool(false)]; + tensor var_1473_cast_fp16 = concat(axis = var_1333, interleave = var_1473_interleave_0, values = (var_1471_cast_fp16, var_1463_cast_fp16))[name = string("op_1473_cast_fp16")]; + tensor var_1474_cast_fp16 = mul(x = var_1473_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1474_cast_fp16")]; + tensor mh_k_15_cast_fp16 = add(x = var_1458_cast_fp16, y = var_1474_cast_fp16)[name = string("mh_k_15_cast_fp16")]; + tensor var_1478 = const()[name = string("op_1478"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_15_cast_fp16 = reshape(shape = var_1478, x = mh_k_15_cast_fp16)[name = string("current_key_15_cast_fp16")]; + tensor var_1485_cast_fp16 = mul(x = var_101_cast_fp16_3, y = var_323_cast_fp16)[name = string("op_1485_cast_fp16")]; + tensor var_1486_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_321_cast_fp16)[name = string("op_1486_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_1485_cast_fp16, y = var_1486_cast_fp16)[name = string("key_21_cast_fp16")]; + tensor var_1489_cast_fp16 = mul(x = var_132_cast_fp16_3, y = var_323_cast_fp16)[name = string("op_1489_cast_fp16")]; + tensor var_1490_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_321_cast_fp16)[name = string("op_1490_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1489_cast_fp16, y = var_1490_cast_fp16)[name = string("value_13_cast_fp16")]; + tensor var_1494 = const()[name = string("op_1494"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_13_cast_fp16 = reshape(shape = var_1494, x = key_21_cast_fp16)[name = string("key_heads_13_cast_fp16")]; + tensor var_1496 = const()[name = string("op_1496"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_13_cast_fp16 = reshape(shape = var_1496, x = value_13_cast_fp16)[name = string("value_heads_13_cast_fp16")]; + tensor var_1499_begin_0 = const()[name = string("op_1499_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1499_end_0 = const()[name = string("op_1499_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1499_end_mask_0 = const()[name = string("op_1499_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1499_cast_fp16 = slice_by_index(begin = var_1499_begin_0, end = var_1499_end_0, end_mask = var_1499_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1499_cast_fp16")]; + tensor var_1503_begin_0 = const()[name = string("op_1503_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1503_end_0 = const()[name = string("op_1503_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1503_end_mask_0 = const()[name = string("op_1503_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1503_cast_fp16 = slice_by_index(begin = var_1503_begin_0, end = var_1503_end_0, end_mask = var_1503_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1503_cast_fp16")]; + tensor var_1515_begin_0 = const()[name = string("op_1515_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1515_end_0 = const()[name = string("op_1515_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1515_end_mask_0 = const()[name = string("op_1515_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1515_cast_fp16 = slice_by_index(begin = var_1515_begin_0, end = var_1515_end_0, end_mask = var_1515_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1515_cast_fp16")]; + tensor var_1519_begin_0 = const()[name = string("op_1519_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1519_end_0 = const()[name = string("op_1519_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1519_end_mask_0 = const()[name = string("op_1519_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1519_cast_fp16 = slice_by_index(begin = var_1519_begin_0, end = var_1519_end_0, end_mask = var_1519_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1519_cast_fp16")]; + tensor var_1531_begin_0 = const()[name = string("op_1531_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1531_end_0 = const()[name = string("op_1531_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1531_end_mask_0 = const()[name = string("op_1531_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1531_cast_fp16 = slice_by_index(begin = var_1531_begin_0, end = var_1531_end_0, end_mask = var_1531_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1531_cast_fp16")]; + tensor var_1535_begin_0 = const()[name = string("op_1535_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1535_end_0 = const()[name = string("op_1535_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1535_end_mask_0 = const()[name = string("op_1535_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1535_cast_fp16 = slice_by_index(begin = var_1535_begin_0, end = var_1535_end_0, end_mask = var_1535_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1535_cast_fp16")]; + tensor var_1547_begin_0 = const()[name = string("op_1547_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1547_end_0 = const()[name = string("op_1547_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1547_end_mask_0 = const()[name = string("op_1547_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1547_cast_fp16 = slice_by_index(begin = var_1547_begin_0, end = var_1547_end_0, end_mask = var_1547_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1547_cast_fp16")]; + tensor var_1551_begin_0 = const()[name = string("op_1551_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1551_end_0 = const()[name = string("op_1551_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1551_end_mask_0 = const()[name = string("op_1551_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1551_cast_fp16 = slice_by_index(begin = var_1551_begin_0, end = var_1551_end_0, end_mask = var_1551_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1551_cast_fp16")]; + tensor var_1563_begin_0 = const()[name = string("op_1563_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1563_end_0 = const()[name = string("op_1563_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1563_end_mask_0 = const()[name = string("op_1563_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1563_cast_fp16 = slice_by_index(begin = var_1563_begin_0, end = var_1563_end_0, end_mask = var_1563_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1563_cast_fp16")]; + tensor var_1567_begin_0 = const()[name = string("op_1567_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1567_end_0 = const()[name = string("op_1567_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1567_end_mask_0 = const()[name = string("op_1567_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1567_cast_fp16 = slice_by_index(begin = var_1567_begin_0, end = var_1567_end_0, end_mask = var_1567_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1567_cast_fp16")]; + tensor var_1579_begin_0 = const()[name = string("op_1579_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1579_end_0 = const()[name = string("op_1579_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1579_end_mask_0 = const()[name = string("op_1579_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1579_cast_fp16 = slice_by_index(begin = var_1579_begin_0, end = var_1579_end_0, end_mask = var_1579_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1579_cast_fp16")]; + tensor var_1583_begin_0 = const()[name = string("op_1583_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1583_end_0 = const()[name = string("op_1583_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1583_end_mask_0 = const()[name = string("op_1583_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1583_cast_fp16 = slice_by_index(begin = var_1583_begin_0, end = var_1583_end_0, end_mask = var_1583_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1583_cast_fp16")]; + tensor var_1595_begin_0 = const()[name = string("op_1595_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1595_end_0 = const()[name = string("op_1595_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1595_end_mask_0 = const()[name = string("op_1595_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1595_cast_fp16 = slice_by_index(begin = var_1595_begin_0, end = var_1595_end_0, end_mask = var_1595_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1595_cast_fp16")]; + tensor var_1599_begin_0 = const()[name = string("op_1599_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1599_end_0 = const()[name = string("op_1599_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1599_end_mask_0 = const()[name = string("op_1599_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1599_cast_fp16 = slice_by_index(begin = var_1599_begin_0, end = var_1599_end_0, end_mask = var_1599_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1599_cast_fp16")]; + tensor var_1611_begin_0 = const()[name = string("op_1611_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1611_end_0 = const()[name = string("op_1611_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1611_end_mask_0 = const()[name = string("op_1611_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1611_cast_fp16 = slice_by_index(begin = var_1611_begin_0, end = var_1611_end_0, end_mask = var_1611_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1611_cast_fp16")]; + tensor var_1615_begin_0 = const()[name = string("op_1615_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1615_end_0 = const()[name = string("op_1615_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1615_end_mask_0 = const()[name = string("op_1615_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1615_cast_fp16 = slice_by_index(begin = var_1615_begin_0, end = var_1615_end_0, end_mask = var_1615_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1615_cast_fp16")]; + bool key_heads_15_interleave_0 = const()[name = string("key_heads_15_interleave_0"), val = bool(false)]; + tensor key_heads_15_cast_fp16 = concat(axis = var_1341, interleave = key_heads_15_interleave_0, values = (var_1499_cast_fp16, var_1499_cast_fp16, var_1515_cast_fp16, var_1515_cast_fp16, var_1531_cast_fp16, var_1531_cast_fp16, var_1547_cast_fp16, var_1547_cast_fp16, var_1563_cast_fp16, var_1563_cast_fp16, var_1579_cast_fp16, var_1579_cast_fp16, var_1595_cast_fp16, var_1595_cast_fp16, var_1611_cast_fp16, var_1611_cast_fp16))[name = string("key_heads_15_cast_fp16")]; + bool value_heads_15_interleave_0 = const()[name = string("value_heads_15_interleave_0"), val = bool(false)]; + tensor value_heads_15_cast_fp16 = concat(axis = var_1341, interleave = value_heads_15_interleave_0, values = (var_1503_cast_fp16, var_1503_cast_fp16, var_1519_cast_fp16, var_1519_cast_fp16, var_1535_cast_fp16, var_1535_cast_fp16, var_1551_cast_fp16, var_1551_cast_fp16, var_1567_cast_fp16, var_1567_cast_fp16, var_1583_cast_fp16, var_1583_cast_fp16, var_1599_cast_fp16, var_1599_cast_fp16, var_1615_cast_fp16, var_1615_cast_fp16))[name = string("value_heads_15_cast_fp16")]; + fp16 var_1638_to_fp16 = const()[name = string("op_1638_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1639_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1638_to_fp16)[name = string("op_1639_cast_fp16")]; + bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; + bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1639_cast_fp16, y = key_heads_15_cast_fp16)[name = string("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_15_cast_fp16")]; + tensor var_1651_cast_fp16 = softmax(axis = var_1323, x = mh_w_15_cast_fp16)[name = string("op_1651_cast_fp16")]; + bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; + bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = value_heads_15_cast_fp16, y = var_1651_cast_fp16)[name = string("attn_7_cast_fp16")]; + tensor var_1656 = const()[name = string("op_1656"), val = tensor([1, -1, 1, 1])]; + tensor input_25_cast_fp16 = reshape(shape = var_1656, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")]; + string obj_35_pad_type_0 = const()[name = string("obj_35_pad_type_0"), val = string("valid")]; + tensor obj_35_strides_0 = const()[name = string("obj_35_strides_0"), val = tensor([1, 1])]; + tensor obj_35_pad_0 = const()[name = string("obj_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_35_dilations_0 = const()[name = string("obj_35_dilations_0"), val = tensor([1, 1])]; + int32 obj_35_groups_0 = const()[name = string("obj_35_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51486464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53583680))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_35_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("obj_35_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_23_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_29_cast_fp16")]; + tensor inputs_sq_31_cast_fp16 = mul(x = inputs_29_cast_fp16, y = inputs_29_cast_fp16)[name = string("inputs_sq_31_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = inputs_sq_31_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1674_to_fp16 = const()[name = string("op_1674_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1675_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1674_to_fp16)[name = string("op_1675_cast_fp16")]; + fp32 var_1676_epsilon_0 = const()[name = string("op_1676_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1676_cast_fp16 = rsqrt(epsilon = var_1676_epsilon_0, x = var_1675_cast_fp16)[name = string("op_1676_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = mul(x = inputs_29_cast_fp16, y = var_1676_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor w_31_to_fp16 = const()[name = string("w_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53584256)))]; + tensor input_27_cast_fp16 = mul(x = w_31_to_fp16, y = hidden_states_37_cast_fp16)[name = string("input_27_cast_fp16")]; + string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; + tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; + tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; + int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; + tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53586368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56732160))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_1690_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_1690_cast_fp16")]; + string var_1696_pad_type_0 = const()[name = string("op_1696_pad_type_0"), val = string("valid")]; + tensor var_1696_strides_0 = const()[name = string("op_1696_strides_0"), val = tensor([1, 1])]; + tensor var_1696_pad_0 = const()[name = string("op_1696_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1696_dilations_0 = const()[name = string("op_1696_dilations_0"), val = tensor([1, 1])]; + int32 var_1696_groups_0 = const()[name = string("op_1696_groups_0"), val = int32(1)]; + tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56732736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59878528))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1696_cast_fp16 = conv(dilations = var_1696_dilations_0, groups = var_1696_groups_0, pad = var_1696_pad_0, pad_type = var_1696_pad_type_0, strides = var_1696_strides_0, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_1696_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_1690_cast_fp16, y = var_1696_cast_fp16)[name = string("input_31_cast_fp16")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59879104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63024896))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("inputs_31_cast_fp16")]; + int32 var_1710 = const()[name = string("op_1710"), val = int32(3)]; + int32 var_1720 = const()[name = string("op_1720"), val = int32(-2)]; + int32 var_1728 = const()[name = string("op_1728"), val = int32(1)]; + tensor inputs_sq_33_cast_fp16 = mul(x = inputs_31_cast_fp16, y = inputs_31_cast_fp16)[name = string("inputs_sq_33_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = inputs_sq_33_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1740_to_fp16 = const()[name = string("op_1740_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1741_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1740_to_fp16)[name = string("op_1741_cast_fp16")]; + fp32 var_1742_epsilon_0 = const()[name = string("op_1742_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1742_cast_fp16 = rsqrt(epsilon = var_1742_epsilon_0, x = var_1741_cast_fp16)[name = string("op_1742_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = mul(x = inputs_31_cast_fp16, y = var_1742_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor w_33_to_fp16 = const()[name = string("w_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63025472)))]; + tensor obj_37_cast_fp16 = mul(x = w_33_to_fp16, y = hidden_states_41_cast_fp16)[name = string("obj_37_cast_fp16")]; + string query_25_pad_type_0 = const()[name = string("query_25_pad_type_0"), val = string("valid")]; + tensor query_25_strides_0 = const()[name = string("query_25_strides_0"), val = tensor([1, 1])]; + tensor query_25_pad_0 = const()[name = string("query_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_25_dilations_0 = const()[name = string("query_25_dilations_0"), val = tensor([1, 1])]; + int32 query_25_groups_0 = const()[name = string("query_25_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63027584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65124800))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_25_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_25_dilations_0, groups = query_25_groups_0, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = query_25_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("query_25_cast_fp16")]; + string current_key_17_pad_type_0 = const()[name = string("current_key_17_pad_type_0"), val = string("valid")]; + tensor current_key_17_strides_0 = const()[name = string("current_key_17_strides_0"), val = tensor([1, 1])]; + tensor current_key_17_pad_0 = const()[name = string("current_key_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_17_dilations_0 = const()[name = string("current_key_17_dilations_0"), val = tensor([1, 1])]; + int32 current_key_17_groups_0 = const()[name = string("current_key_17_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65125376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66174016))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_17_cast_fp16 = conv(dilations = current_key_17_dilations_0, groups = current_key_17_groups_0, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = current_key_17_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_key_17_cast_fp16")]; + string current_value_9_pad_type_0 = const()[name = string("current_value_9_pad_type_0"), val = string("valid")]; + tensor current_value_9_strides_0 = const()[name = string("current_value_9_strides_0"), val = tensor([1, 1])]; + tensor current_value_9_pad_0 = const()[name = string("current_value_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_9_dilations_0 = const()[name = string("current_value_9_dilations_0"), val = tensor([1, 1])]; + int32 current_value_9_groups_0 = const()[name = string("current_value_9_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66174592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67223232))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_9_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_9_dilations_0, groups = current_value_9_groups_0, pad = current_value_9_pad_0, pad_type = current_value_9_pad_type_0, strides = current_value_9_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_value_9_cast_fp16")]; + tensor var_1779 = const()[name = string("op_1779"), val = tensor([16, 128, 1, 1])]; + tensor inputs_33_cast_fp16 = reshape(shape = var_1779, x = query_25_cast_fp16)[name = string("inputs_33_cast_fp16")]; + tensor inputs_sq_35_cast_fp16 = mul(x = inputs_33_cast_fp16, y = inputs_33_cast_fp16)[name = string("inputs_sq_35_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = inputs_sq_35_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1785_to_fp16 = const()[name = string("op_1785_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1786_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1785_to_fp16)[name = string("op_1786_cast_fp16")]; + fp32 var_1787_epsilon_0 = const()[name = string("op_1787_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1787_cast_fp16 = rsqrt(epsilon = var_1787_epsilon_0, x = var_1786_cast_fp16)[name = string("op_1787_cast_fp16")]; + tensor hidden_states_43_cast_fp16 = mul(x = inputs_33_cast_fp16, y = var_1787_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; + tensor w_35_to_fp16 = const()[name = string("w_35_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67223808)))]; + tensor query_normed_9_cast_fp16 = mul(x = w_35_to_fp16, y = hidden_states_43_cast_fp16)[name = string("query_normed_9_cast_fp16")]; + tensor var_1795 = const()[name = string("op_1795"), val = tensor([8, 128, 1, 1])]; + tensor inputs_35_cast_fp16 = reshape(shape = var_1795, x = current_key_17_cast_fp16)[name = string("inputs_35_cast_fp16")]; + tensor inputs_sq_37_cast_fp16 = mul(x = inputs_35_cast_fp16, y = inputs_35_cast_fp16)[name = string("inputs_sq_37_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = inputs_sq_37_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1801_to_fp16 = const()[name = string("op_1801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1802_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1801_to_fp16)[name = string("op_1802_cast_fp16")]; + fp32 var_1803_epsilon_0 = const()[name = string("op_1803_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1803_cast_fp16 = rsqrt(epsilon = var_1803_epsilon_0, x = var_1802_cast_fp16)[name = string("op_1803_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = mul(x = inputs_35_cast_fp16, y = var_1803_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor w_37_to_fp16 = const()[name = string("w_37_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67224128)))]; + tensor current_key_normed_9_cast_fp16 = mul(x = w_37_to_fp16, y = hidden_states_45_cast_fp16)[name = string("current_key_normed_9_cast_fp16")]; + tensor var_1821 = const()[name = string("op_1821"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_1821, x = query_normed_9_cast_fp16)[name = string("mh_q_25_cast_fp16")]; + tensor var_1823 = const()[name = string("op_1823"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_17_cast_fp16 = reshape(shape = var_1823, x = current_key_normed_9_cast_fp16)[name = string("mh_k_17_cast_fp16")]; + tensor var_1827_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1827_cast_fp16")]; + tensor var_1832_begin_0 = const()[name = string("op_1832_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_end_0 = const()[name = string("op_1832_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1832_end_mask_0 = const()[name = string("op_1832_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1832_cast_fp16 = slice_by_index(begin = var_1832_begin_0, end = var_1832_end_0, end_mask = var_1832_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor var_1838_begin_0 = const()[name = string("op_1838_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1838_end_0 = const()[name = string("op_1838_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1838_end_mask_0 = const()[name = string("op_1838_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1838_cast_fp16 = slice_by_index(begin = var_1838_begin_0, end = var_1838_end_0, end_mask = var_1838_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1838_cast_fp16")]; + fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1840_cast_fp16 = mul(x = var_1838_cast_fp16, y = const_109_promoted_to_fp16)[name = string("op_1840_cast_fp16")]; + bool var_1842_interleave_0 = const()[name = string("op_1842_interleave_0"), val = bool(false)]; + tensor var_1842_cast_fp16 = concat(axis = var_1720, interleave = var_1842_interleave_0, values = (var_1840_cast_fp16, var_1832_cast_fp16))[name = string("op_1842_cast_fp16")]; + tensor var_1843_cast_fp16 = mul(x = var_1842_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1843_cast_fp16")]; + tensor mh_q_27_cast_fp16 = add(x = var_1827_cast_fp16, y = var_1843_cast_fp16)[name = string("mh_q_27_cast_fp16")]; + tensor var_1845_cast_fp16 = mul(x = mh_k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1845_cast_fp16")]; + tensor var_1850_begin_0 = const()[name = string("op_1850_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1850_end_0 = const()[name = string("op_1850_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1850_end_mask_0 = const()[name = string("op_1850_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1850_cast_fp16 = slice_by_index(begin = var_1850_begin_0, end = var_1850_end_0, end_mask = var_1850_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1850_cast_fp16")]; + tensor var_1856_begin_0 = const()[name = string("op_1856_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1856_end_0 = const()[name = string("op_1856_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1856_end_mask_0 = const()[name = string("op_1856_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1856_cast_fp16 = slice_by_index(begin = var_1856_begin_0, end = var_1856_end_0, end_mask = var_1856_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1856_cast_fp16")]; + fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1858_cast_fp16 = mul(x = var_1856_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_1858_cast_fp16")]; + bool var_1860_interleave_0 = const()[name = string("op_1860_interleave_0"), val = bool(false)]; + tensor var_1860_cast_fp16 = concat(axis = var_1720, interleave = var_1860_interleave_0, values = (var_1858_cast_fp16, var_1850_cast_fp16))[name = string("op_1860_cast_fp16")]; + tensor var_1861_cast_fp16 = mul(x = var_1860_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1861_cast_fp16")]; + tensor mh_k_19_cast_fp16 = add(x = var_1845_cast_fp16, y = var_1861_cast_fp16)[name = string("mh_k_19_cast_fp16")]; + tensor var_1865 = const()[name = string("op_1865"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_19_cast_fp16 = reshape(shape = var_1865, x = mh_k_19_cast_fp16)[name = string("current_key_19_cast_fp16")]; + tensor var_1872_cast_fp16 = mul(x = var_101_cast_fp16_4, y = var_323_cast_fp16)[name = string("op_1872_cast_fp16")]; + tensor var_1873_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_321_cast_fp16)[name = string("op_1873_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_1872_cast_fp16, y = var_1873_cast_fp16)[name = string("key_27_cast_fp16")]; + tensor var_1876_cast_fp16 = mul(x = var_132_cast_fp16_4, y = var_323_cast_fp16)[name = string("op_1876_cast_fp16")]; + tensor var_1877_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_321_cast_fp16)[name = string("op_1877_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1876_cast_fp16, y = var_1877_cast_fp16)[name = string("value_17_cast_fp16")]; + tensor var_1881 = const()[name = string("op_1881"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_17_cast_fp16 = reshape(shape = var_1881, x = key_27_cast_fp16)[name = string("key_heads_17_cast_fp16")]; + tensor var_1883 = const()[name = string("op_1883"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_17_cast_fp16 = reshape(shape = var_1883, x = value_17_cast_fp16)[name = string("value_heads_17_cast_fp16")]; + tensor var_1886_begin_0 = const()[name = string("op_1886_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1886_end_0 = const()[name = string("op_1886_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1886_end_mask_0 = const()[name = string("op_1886_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1886_cast_fp16 = slice_by_index(begin = var_1886_begin_0, end = var_1886_end_0, end_mask = var_1886_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor var_1890_begin_0 = const()[name = string("op_1890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1890_end_0 = const()[name = string("op_1890_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1890_end_mask_0 = const()[name = string("op_1890_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1890_cast_fp16 = slice_by_index(begin = var_1890_begin_0, end = var_1890_end_0, end_mask = var_1890_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1890_cast_fp16")]; + tensor var_1902_begin_0 = const()[name = string("op_1902_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1902_end_0 = const()[name = string("op_1902_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1902_end_mask_0 = const()[name = string("op_1902_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1902_cast_fp16 = slice_by_index(begin = var_1902_begin_0, end = var_1902_end_0, end_mask = var_1902_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1902_cast_fp16")]; + tensor var_1906_begin_0 = const()[name = string("op_1906_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1906_end_0 = const()[name = string("op_1906_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_1906_end_mask_0 = const()[name = string("op_1906_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1906_cast_fp16 = slice_by_index(begin = var_1906_begin_0, end = var_1906_end_0, end_mask = var_1906_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1906_cast_fp16")]; + tensor var_1918_begin_0 = const()[name = string("op_1918_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1918_end_0 = const()[name = string("op_1918_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1918_end_mask_0 = const()[name = string("op_1918_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1918_cast_fp16 = slice_by_index(begin = var_1918_begin_0, end = var_1918_end_0, end_mask = var_1918_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1918_cast_fp16")]; + tensor var_1922_begin_0 = const()[name = string("op_1922_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1922_end_0 = const()[name = string("op_1922_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_1922_end_mask_0 = const()[name = string("op_1922_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1922_cast_fp16 = slice_by_index(begin = var_1922_begin_0, end = var_1922_end_0, end_mask = var_1922_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1922_cast_fp16")]; + tensor var_1934_begin_0 = const()[name = string("op_1934_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1934_end_0 = const()[name = string("op_1934_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1934_end_mask_0 = const()[name = string("op_1934_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1934_cast_fp16 = slice_by_index(begin = var_1934_begin_0, end = var_1934_end_0, end_mask = var_1934_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1934_cast_fp16")]; + tensor var_1938_begin_0 = const()[name = string("op_1938_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1938_end_0 = const()[name = string("op_1938_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_1938_end_mask_0 = const()[name = string("op_1938_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1938_cast_fp16 = slice_by_index(begin = var_1938_begin_0, end = var_1938_end_0, end_mask = var_1938_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1938_cast_fp16")]; + tensor var_1950_begin_0 = const()[name = string("op_1950_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1950_end_0 = const()[name = string("op_1950_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1950_end_mask_0 = const()[name = string("op_1950_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1950_cast_fp16 = slice_by_index(begin = var_1950_begin_0, end = var_1950_end_0, end_mask = var_1950_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1950_cast_fp16")]; + tensor var_1954_begin_0 = const()[name = string("op_1954_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1954_end_0 = const()[name = string("op_1954_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_1954_end_mask_0 = const()[name = string("op_1954_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1954_cast_fp16 = slice_by_index(begin = var_1954_begin_0, end = var_1954_end_0, end_mask = var_1954_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1954_cast_fp16")]; + tensor var_1966_begin_0 = const()[name = string("op_1966_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1966_end_0 = const()[name = string("op_1966_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1966_end_mask_0 = const()[name = string("op_1966_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1966_cast_fp16 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1966_cast_fp16")]; + tensor var_1970_begin_0 = const()[name = string("op_1970_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1970_end_0 = const()[name = string("op_1970_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_1970_end_mask_0 = const()[name = string("op_1970_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1970_cast_fp16 = slice_by_index(begin = var_1970_begin_0, end = var_1970_end_0, end_mask = var_1970_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1970_cast_fp16")]; + tensor var_1982_begin_0 = const()[name = string("op_1982_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1982_end_0 = const()[name = string("op_1982_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1982_end_mask_0 = const()[name = string("op_1982_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1982_cast_fp16 = slice_by_index(begin = var_1982_begin_0, end = var_1982_end_0, end_mask = var_1982_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1982_cast_fp16")]; + tensor var_1986_begin_0 = const()[name = string("op_1986_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1986_end_0 = const()[name = string("op_1986_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_1986_end_mask_0 = const()[name = string("op_1986_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1986_cast_fp16")]; + tensor var_1998_begin_0 = const()[name = string("op_1998_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1998_end_0 = const()[name = string("op_1998_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_1998_end_mask_0 = const()[name = string("op_1998_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1998_cast_fp16 = slice_by_index(begin = var_1998_begin_0, end = var_1998_end_0, end_mask = var_1998_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1998_cast_fp16")]; + tensor var_2002_begin_0 = const()[name = string("op_2002_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_2002_end_0 = const()[name = string("op_2002_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2002_end_mask_0 = const()[name = string("op_2002_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2002_cast_fp16 = slice_by_index(begin = var_2002_begin_0, end = var_2002_end_0, end_mask = var_2002_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_2002_cast_fp16")]; + bool key_heads_19_interleave_0 = const()[name = string("key_heads_19_interleave_0"), val = bool(false)]; + tensor key_heads_19_cast_fp16 = concat(axis = var_1728, interleave = key_heads_19_interleave_0, values = (var_1886_cast_fp16, var_1886_cast_fp16, var_1902_cast_fp16, var_1902_cast_fp16, var_1918_cast_fp16, var_1918_cast_fp16, var_1934_cast_fp16, var_1934_cast_fp16, var_1950_cast_fp16, var_1950_cast_fp16, var_1966_cast_fp16, var_1966_cast_fp16, var_1982_cast_fp16, var_1982_cast_fp16, var_1998_cast_fp16, var_1998_cast_fp16))[name = string("key_heads_19_cast_fp16")]; + bool value_heads_19_interleave_0 = const()[name = string("value_heads_19_interleave_0"), val = bool(false)]; + tensor value_heads_19_cast_fp16 = concat(axis = var_1728, interleave = value_heads_19_interleave_0, values = (var_1890_cast_fp16, var_1890_cast_fp16, var_1906_cast_fp16, var_1906_cast_fp16, var_1922_cast_fp16, var_1922_cast_fp16, var_1938_cast_fp16, var_1938_cast_fp16, var_1954_cast_fp16, var_1954_cast_fp16, var_1970_cast_fp16, var_1970_cast_fp16, var_1986_cast_fp16, var_1986_cast_fp16, var_2002_cast_fp16, var_2002_cast_fp16))[name = string("value_heads_19_cast_fp16")]; + fp16 var_2025_to_fp16 = const()[name = string("op_2025_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_2026_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_2025_to_fp16)[name = string("op_2026_cast_fp16")]; + bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; + bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_2026_cast_fp16, y = key_heads_19_cast_fp16)[name = string("mh_w_17_cast_fp16")]; + tensor mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_19_cast_fp16")]; + tensor var_2038_cast_fp16 = softmax(axis = var_1710, x = mh_w_19_cast_fp16)[name = string("op_2038_cast_fp16")]; + bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; + bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = value_heads_19_cast_fp16, y = var_2038_cast_fp16)[name = string("attn_9_cast_fp16")]; + tensor var_2043 = const()[name = string("op_2043"), val = tensor([1, -1, 1, 1])]; + tensor input_33_cast_fp16 = reshape(shape = var_2043, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")]; + string obj_43_pad_type_0 = const()[name = string("obj_43_pad_type_0"), val = string("valid")]; + tensor obj_43_strides_0 = const()[name = string("obj_43_strides_0"), val = tensor([1, 1])]; + tensor obj_43_pad_0 = const()[name = string("obj_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_43_dilations_0 = const()[name = string("obj_43_dilations_0"), val = tensor([1, 1])]; + int32 obj_43_groups_0 = const()[name = string("obj_43_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69321664))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_43_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_43_dilations_0, groups = obj_43_groups_0, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = obj_43_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("obj_43_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_37_cast_fp16")]; + tensor inputs_sq_39_cast_fp16 = mul(x = inputs_37_cast_fp16, y = inputs_37_cast_fp16)[name = string("inputs_sq_39_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = inputs_sq_39_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_2061_to_fp16 = const()[name = string("op_2061_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2062_cast_fp16 = add(x = variance_39_cast_fp16, y = var_2061_to_fp16)[name = string("op_2062_cast_fp16")]; + fp32 var_2063_epsilon_0 = const()[name = string("op_2063_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2063_cast_fp16 = rsqrt(epsilon = var_2063_epsilon_0, x = var_2062_cast_fp16)[name = string("op_2063_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = mul(x = inputs_37_cast_fp16, y = var_2063_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + tensor w_39_to_fp16 = const()[name = string("w_39_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69322240)))]; + tensor input_35_cast_fp16 = mul(x = w_39_to_fp16, y = hidden_states_47_cast_fp16)[name = string("input_35_cast_fp16")]; + string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; + tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; + tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; + int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; + tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69324352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72470144))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")]; + tensor var_2077_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_2077_cast_fp16")]; + string var_2083_pad_type_0 = const()[name = string("op_2083_pad_type_0"), val = string("valid")]; + tensor var_2083_strides_0 = const()[name = string("op_2083_strides_0"), val = tensor([1, 1])]; + tensor var_2083_pad_0 = const()[name = string("op_2083_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2083_dilations_0 = const()[name = string("op_2083_dilations_0"), val = tensor([1, 1])]; + int32 var_2083_groups_0 = const()[name = string("op_2083_groups_0"), val = int32(1)]; + tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72470720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75616512))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_2083_cast_fp16 = conv(dilations = var_2083_dilations_0, groups = var_2083_groups_0, pad = var_2083_pad_0, pad_type = var_2083_pad_type_0, strides = var_2083_strides_0, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_2083_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_2077_cast_fp16, y = var_2083_cast_fp16)[name = string("input_39_cast_fp16")]; + string hidden_states_49_pad_type_0 = const()[name = string("hidden_states_49_pad_type_0"), val = string("valid")]; + tensor hidden_states_49_strides_0 = const()[name = string("hidden_states_49_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_49_pad_0 = const()[name = string("hidden_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_49_dilations_0 = const()[name = string("hidden_states_49_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_49_groups_0 = const()[name = string("hidden_states_49_groups_0"), val = int32(1)]; + tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75617088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78762880))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_49_cast_fp16 = conv(dilations = hidden_states_49_dilations_0, groups = hidden_states_49_groups_0, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = hidden_states_49_strides_0, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("inputs_39_cast_fp16")]; + int32 var_2097 = const()[name = string("op_2097"), val = int32(3)]; + int32 var_2107 = const()[name = string("op_2107"), val = int32(-2)]; + int32 var_2115 = const()[name = string("op_2115"), val = int32(1)]; + tensor inputs_sq_41_cast_fp16 = mul(x = inputs_39_cast_fp16, y = inputs_39_cast_fp16)[name = string("inputs_sq_41_cast_fp16")]; + tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([1])]; + bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; + tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = inputs_sq_41_cast_fp16)[name = string("variance_41_cast_fp16")]; + fp16 var_2127_to_fp16 = const()[name = string("op_2127_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2128_cast_fp16 = add(x = variance_41_cast_fp16, y = var_2127_to_fp16)[name = string("op_2128_cast_fp16")]; + fp32 var_2129_epsilon_0 = const()[name = string("op_2129_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2129_cast_fp16 = rsqrt(epsilon = var_2129_epsilon_0, x = var_2128_cast_fp16)[name = string("op_2129_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = mul(x = inputs_39_cast_fp16, y = var_2129_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor w_41_to_fp16 = const()[name = string("w_41_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78763456)))]; + tensor obj_45_cast_fp16 = mul(x = w_41_to_fp16, y = hidden_states_51_cast_fp16)[name = string("obj_45_cast_fp16")]; + string query_31_pad_type_0 = const()[name = string("query_31_pad_type_0"), val = string("valid")]; + tensor query_31_strides_0 = const()[name = string("query_31_strides_0"), val = tensor([1, 1])]; + tensor query_31_pad_0 = const()[name = string("query_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_31_dilations_0 = const()[name = string("query_31_dilations_0"), val = tensor([1, 1])]; + int32 query_31_groups_0 = const()[name = string("query_31_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78765568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80862784))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_31_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_31_dilations_0, groups = query_31_groups_0, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = query_31_strides_0, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("query_31_cast_fp16")]; + string current_key_21_pad_type_0 = const()[name = string("current_key_21_pad_type_0"), val = string("valid")]; + tensor current_key_21_strides_0 = const()[name = string("current_key_21_strides_0"), val = tensor([1, 1])]; + tensor current_key_21_pad_0 = const()[name = string("current_key_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_21_dilations_0 = const()[name = string("current_key_21_dilations_0"), val = tensor([1, 1])]; + int32 current_key_21_groups_0 = const()[name = string("current_key_21_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80863360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81912000))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_21_cast_fp16 = conv(dilations = current_key_21_dilations_0, groups = current_key_21_groups_0, pad = current_key_21_pad_0, pad_type = current_key_21_pad_type_0, strides = current_key_21_strides_0, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("current_key_21_cast_fp16")]; + string current_value_11_pad_type_0 = const()[name = string("current_value_11_pad_type_0"), val = string("valid")]; + tensor current_value_11_strides_0 = const()[name = string("current_value_11_strides_0"), val = tensor([1, 1])]; + tensor current_value_11_pad_0 = const()[name = string("current_value_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_11_dilations_0 = const()[name = string("current_value_11_dilations_0"), val = tensor([1, 1])]; + int32 current_value_11_groups_0 = const()[name = string("current_value_11_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81912576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82961216))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_11_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_11_dilations_0, groups = current_value_11_groups_0, pad = current_value_11_pad_0, pad_type = current_value_11_pad_type_0, strides = current_value_11_strides_0, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("current_value_11_cast_fp16")]; + tensor var_2166 = const()[name = string("op_2166"), val = tensor([16, 128, 1, 1])]; + tensor inputs_41_cast_fp16 = reshape(shape = var_2166, x = query_31_cast_fp16)[name = string("inputs_41_cast_fp16")]; + tensor inputs_sq_43_cast_fp16 = mul(x = inputs_41_cast_fp16, y = inputs_41_cast_fp16)[name = string("inputs_sq_43_cast_fp16")]; + tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([1])]; + bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; + tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = inputs_sq_43_cast_fp16)[name = string("variance_43_cast_fp16")]; + fp16 var_2172_to_fp16 = const()[name = string("op_2172_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2173_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2172_to_fp16)[name = string("op_2173_cast_fp16")]; + fp32 var_2174_epsilon_0 = const()[name = string("op_2174_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2174_cast_fp16 = rsqrt(epsilon = var_2174_epsilon_0, x = var_2173_cast_fp16)[name = string("op_2174_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = mul(x = inputs_41_cast_fp16, y = var_2174_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor w_43_to_fp16 = const()[name = string("w_43_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82961792)))]; + tensor query_normed_11_cast_fp16 = mul(x = w_43_to_fp16, y = hidden_states_53_cast_fp16)[name = string("query_normed_11_cast_fp16")]; + tensor var_2182 = const()[name = string("op_2182"), val = tensor([8, 128, 1, 1])]; + tensor inputs_43_cast_fp16 = reshape(shape = var_2182, x = current_key_21_cast_fp16)[name = string("inputs_43_cast_fp16")]; + tensor inputs_sq_45_cast_fp16 = mul(x = inputs_43_cast_fp16, y = inputs_43_cast_fp16)[name = string("inputs_sq_45_cast_fp16")]; + tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([1])]; + bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; + tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = inputs_sq_45_cast_fp16)[name = string("variance_45_cast_fp16")]; + fp16 var_2188_to_fp16 = const()[name = string("op_2188_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2189_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2188_to_fp16)[name = string("op_2189_cast_fp16")]; + fp32 var_2190_epsilon_0 = const()[name = string("op_2190_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2190_cast_fp16 = rsqrt(epsilon = var_2190_epsilon_0, x = var_2189_cast_fp16)[name = string("op_2190_cast_fp16")]; + tensor hidden_states_55_cast_fp16 = mul(x = inputs_43_cast_fp16, y = var_2190_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor w_45_to_fp16 = const()[name = string("w_45_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82962112)))]; + tensor current_key_normed_11_cast_fp16 = mul(x = w_45_to_fp16, y = hidden_states_55_cast_fp16)[name = string("current_key_normed_11_cast_fp16")]; + tensor var_2208 = const()[name = string("op_2208"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_31_cast_fp16 = reshape(shape = var_2208, x = query_normed_11_cast_fp16)[name = string("mh_q_31_cast_fp16")]; + tensor var_2210 = const()[name = string("op_2210"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_21_cast_fp16 = reshape(shape = var_2210, x = current_key_normed_11_cast_fp16)[name = string("mh_k_21_cast_fp16")]; + tensor var_2214_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2214_cast_fp16")]; + tensor var_2219_begin_0 = const()[name = string("op_2219_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2219_end_0 = const()[name = string("op_2219_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_2219_end_mask_0 = const()[name = string("op_2219_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2219_cast_fp16 = slice_by_index(begin = var_2219_begin_0, end = var_2219_end_0, end_mask = var_2219_end_mask_0, x = mh_q_31_cast_fp16)[name = string("op_2219_cast_fp16")]; + tensor var_2225_begin_0 = const()[name = string("op_2225_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2225_end_0 = const()[name = string("op_2225_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_2225_end_mask_0 = const()[name = string("op_2225_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2225_cast_fp16 = slice_by_index(begin = var_2225_begin_0, end = var_2225_end_0, end_mask = var_2225_end_mask_0, x = mh_q_31_cast_fp16)[name = string("op_2225_cast_fp16")]; + fp16 const_132_promoted_to_fp16 = const()[name = string("const_132_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2227_cast_fp16 = mul(x = var_2225_cast_fp16, y = const_132_promoted_to_fp16)[name = string("op_2227_cast_fp16")]; + bool var_2229_interleave_0 = const()[name = string("op_2229_interleave_0"), val = bool(false)]; + tensor var_2229_cast_fp16 = concat(axis = var_2107, interleave = var_2229_interleave_0, values = (var_2227_cast_fp16, var_2219_cast_fp16))[name = string("op_2229_cast_fp16")]; + tensor var_2230_cast_fp16 = mul(x = var_2229_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2230_cast_fp16")]; + tensor mh_q_33_cast_fp16 = add(x = var_2214_cast_fp16, y = var_2230_cast_fp16)[name = string("mh_q_33_cast_fp16")]; + tensor var_2232_cast_fp16 = mul(x = mh_k_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2232_cast_fp16")]; + tensor var_2237_begin_0 = const()[name = string("op_2237_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2237_end_0 = const()[name = string("op_2237_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_2237_end_mask_0 = const()[name = string("op_2237_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2237_cast_fp16 = slice_by_index(begin = var_2237_begin_0, end = var_2237_end_0, end_mask = var_2237_end_mask_0, x = mh_k_21_cast_fp16)[name = string("op_2237_cast_fp16")]; + tensor var_2243_begin_0 = const()[name = string("op_2243_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2243_end_0 = const()[name = string("op_2243_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_2243_end_mask_0 = const()[name = string("op_2243_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2243_cast_fp16 = slice_by_index(begin = var_2243_begin_0, end = var_2243_end_0, end_mask = var_2243_end_mask_0, x = mh_k_21_cast_fp16)[name = string("op_2243_cast_fp16")]; + fp16 const_135_promoted_to_fp16 = const()[name = string("const_135_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2245_cast_fp16 = mul(x = var_2243_cast_fp16, y = const_135_promoted_to_fp16)[name = string("op_2245_cast_fp16")]; + bool var_2247_interleave_0 = const()[name = string("op_2247_interleave_0"), val = bool(false)]; + tensor var_2247_cast_fp16 = concat(axis = var_2107, interleave = var_2247_interleave_0, values = (var_2245_cast_fp16, var_2237_cast_fp16))[name = string("op_2247_cast_fp16")]; + tensor var_2248_cast_fp16 = mul(x = var_2247_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2248_cast_fp16")]; + tensor mh_k_23_cast_fp16 = add(x = var_2232_cast_fp16, y = var_2248_cast_fp16)[name = string("mh_k_23_cast_fp16")]; + tensor var_2252 = const()[name = string("op_2252"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_23_cast_fp16 = reshape(shape = var_2252, x = mh_k_23_cast_fp16)[name = string("current_key_23_cast_fp16")]; + tensor var_2259_cast_fp16 = mul(x = var_101_cast_fp16_5, y = var_323_cast_fp16)[name = string("op_2259_cast_fp16")]; + tensor var_2260_cast_fp16 = mul(x = current_key_23_cast_fp16, y = var_321_cast_fp16)[name = string("op_2260_cast_fp16")]; + tensor key_33_cast_fp16 = add(x = var_2259_cast_fp16, y = var_2260_cast_fp16)[name = string("key_33_cast_fp16")]; + tensor var_2263_cast_fp16 = mul(x = var_132_cast_fp16_5, y = var_323_cast_fp16)[name = string("op_2263_cast_fp16")]; + tensor var_2264_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_321_cast_fp16)[name = string("op_2264_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_2263_cast_fp16, y = var_2264_cast_fp16)[name = string("value_21_cast_fp16")]; + tensor var_2268 = const()[name = string("op_2268"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_21_cast_fp16 = reshape(shape = var_2268, x = key_33_cast_fp16)[name = string("key_heads_21_cast_fp16")]; + tensor var_2270 = const()[name = string("op_2270"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_21_cast_fp16 = reshape(shape = var_2270, x = value_21_cast_fp16)[name = string("value_heads_21_cast_fp16")]; + tensor var_2273_begin_0 = const()[name = string("op_2273_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2273_end_0 = const()[name = string("op_2273_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2273_end_mask_0 = const()[name = string("op_2273_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2273_cast_fp16 = slice_by_index(begin = var_2273_begin_0, end = var_2273_end_0, end_mask = var_2273_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2273_cast_fp16")]; + tensor var_2277_begin_0 = const()[name = string("op_2277_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2277_end_0 = const()[name = string("op_2277_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2277_end_mask_0 = const()[name = string("op_2277_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2277_cast_fp16 = slice_by_index(begin = var_2277_begin_0, end = var_2277_end_0, end_mask = var_2277_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2277_cast_fp16")]; + tensor var_2289_begin_0 = const()[name = string("op_2289_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_2289_end_0 = const()[name = string("op_2289_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_2289_end_mask_0 = const()[name = string("op_2289_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2289_cast_fp16 = slice_by_index(begin = var_2289_begin_0, end = var_2289_end_0, end_mask = var_2289_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2289_cast_fp16")]; + tensor var_2293_begin_0 = const()[name = string("op_2293_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_2293_end_0 = const()[name = string("op_2293_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_2293_end_mask_0 = const()[name = string("op_2293_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2293_cast_fp16 = slice_by_index(begin = var_2293_begin_0, end = var_2293_end_0, end_mask = var_2293_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2293_cast_fp16")]; + tensor var_2305_begin_0 = const()[name = string("op_2305_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_2305_end_0 = const()[name = string("op_2305_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_2305_end_mask_0 = const()[name = string("op_2305_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2305_cast_fp16 = slice_by_index(begin = var_2305_begin_0, end = var_2305_end_0, end_mask = var_2305_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2305_cast_fp16")]; + tensor var_2309_begin_0 = const()[name = string("op_2309_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_2309_end_0 = const()[name = string("op_2309_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_2309_end_mask_0 = const()[name = string("op_2309_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2309_cast_fp16 = slice_by_index(begin = var_2309_begin_0, end = var_2309_end_0, end_mask = var_2309_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2309_cast_fp16")]; + tensor var_2321_begin_0 = const()[name = string("op_2321_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_2321_end_0 = const()[name = string("op_2321_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_2321_end_mask_0 = const()[name = string("op_2321_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2321_cast_fp16 = slice_by_index(begin = var_2321_begin_0, end = var_2321_end_0, end_mask = var_2321_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2321_cast_fp16")]; + tensor var_2325_begin_0 = const()[name = string("op_2325_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_2325_end_0 = const()[name = string("op_2325_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_2325_end_mask_0 = const()[name = string("op_2325_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2325_cast_fp16 = slice_by_index(begin = var_2325_begin_0, end = var_2325_end_0, end_mask = var_2325_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2325_cast_fp16")]; + tensor var_2337_begin_0 = const()[name = string("op_2337_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_2337_end_0 = const()[name = string("op_2337_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_2337_end_mask_0 = const()[name = string("op_2337_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2337_cast_fp16 = slice_by_index(begin = var_2337_begin_0, end = var_2337_end_0, end_mask = var_2337_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2337_cast_fp16")]; + tensor var_2341_begin_0 = const()[name = string("op_2341_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_2341_end_0 = const()[name = string("op_2341_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_2341_end_mask_0 = const()[name = string("op_2341_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2341_cast_fp16 = slice_by_index(begin = var_2341_begin_0, end = var_2341_end_0, end_mask = var_2341_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2341_cast_fp16")]; + tensor var_2353_begin_0 = const()[name = string("op_2353_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_2353_end_0 = const()[name = string("op_2353_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_2353_end_mask_0 = const()[name = string("op_2353_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2353_cast_fp16 = slice_by_index(begin = var_2353_begin_0, end = var_2353_end_0, end_mask = var_2353_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2353_cast_fp16")]; + tensor var_2357_begin_0 = const()[name = string("op_2357_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_2357_end_0 = const()[name = string("op_2357_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_2357_end_mask_0 = const()[name = string("op_2357_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2357_cast_fp16 = slice_by_index(begin = var_2357_begin_0, end = var_2357_end_0, end_mask = var_2357_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2357_cast_fp16")]; + tensor var_2369_begin_0 = const()[name = string("op_2369_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_2369_end_0 = const()[name = string("op_2369_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_2369_end_mask_0 = const()[name = string("op_2369_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2369_cast_fp16 = slice_by_index(begin = var_2369_begin_0, end = var_2369_end_0, end_mask = var_2369_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2369_cast_fp16")]; + tensor var_2373_begin_0 = const()[name = string("op_2373_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_2373_end_0 = const()[name = string("op_2373_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_2373_end_mask_0 = const()[name = string("op_2373_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2373_cast_fp16 = slice_by_index(begin = var_2373_begin_0, end = var_2373_end_0, end_mask = var_2373_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2373_cast_fp16")]; + tensor var_2385_begin_0 = const()[name = string("op_2385_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_2385_end_0 = const()[name = string("op_2385_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2385_end_mask_0 = const()[name = string("op_2385_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2385_cast_fp16 = slice_by_index(begin = var_2385_begin_0, end = var_2385_end_0, end_mask = var_2385_end_mask_0, x = key_heads_21_cast_fp16)[name = string("op_2385_cast_fp16")]; + tensor var_2389_begin_0 = const()[name = string("op_2389_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_2389_end_0 = const()[name = string("op_2389_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2389_end_mask_0 = const()[name = string("op_2389_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2389_cast_fp16 = slice_by_index(begin = var_2389_begin_0, end = var_2389_end_0, end_mask = var_2389_end_mask_0, x = value_heads_21_cast_fp16)[name = string("op_2389_cast_fp16")]; + bool key_heads_23_interleave_0 = const()[name = string("key_heads_23_interleave_0"), val = bool(false)]; + tensor key_heads_23_cast_fp16 = concat(axis = var_2115, interleave = key_heads_23_interleave_0, values = (var_2273_cast_fp16, var_2273_cast_fp16, var_2289_cast_fp16, var_2289_cast_fp16, var_2305_cast_fp16, var_2305_cast_fp16, var_2321_cast_fp16, var_2321_cast_fp16, var_2337_cast_fp16, var_2337_cast_fp16, var_2353_cast_fp16, var_2353_cast_fp16, var_2369_cast_fp16, var_2369_cast_fp16, var_2385_cast_fp16, var_2385_cast_fp16))[name = string("key_heads_23_cast_fp16")]; + bool value_heads_23_interleave_0 = const()[name = string("value_heads_23_interleave_0"), val = bool(false)]; + tensor value_heads_23_cast_fp16 = concat(axis = var_2115, interleave = value_heads_23_interleave_0, values = (var_2277_cast_fp16, var_2277_cast_fp16, var_2293_cast_fp16, var_2293_cast_fp16, var_2309_cast_fp16, var_2309_cast_fp16, var_2325_cast_fp16, var_2325_cast_fp16, var_2341_cast_fp16, var_2341_cast_fp16, var_2357_cast_fp16, var_2357_cast_fp16, var_2373_cast_fp16, var_2373_cast_fp16, var_2389_cast_fp16, var_2389_cast_fp16))[name = string("value_heads_23_cast_fp16")]; + fp16 var_2412_to_fp16 = const()[name = string("op_2412_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_2413_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2412_to_fp16)[name = string("op_2413_cast_fp16")]; + bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; + bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2413_cast_fp16, y = key_heads_23_cast_fp16)[name = string("mh_w_21_cast_fp16")]; + tensor mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_23_cast_fp16")]; + tensor var_2425_cast_fp16 = softmax(axis = var_2097, x = mh_w_23_cast_fp16)[name = string("op_2425_cast_fp16")]; + bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; + bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = value_heads_23_cast_fp16, y = var_2425_cast_fp16)[name = string("attn_11_cast_fp16")]; + tensor var_2430 = const()[name = string("op_2430"), val = tensor([1, -1, 1, 1])]; + tensor input_41_cast_fp16 = reshape(shape = var_2430, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")]; + string obj_51_pad_type_0 = const()[name = string("obj_51_pad_type_0"), val = string("valid")]; + tensor obj_51_strides_0 = const()[name = string("obj_51_strides_0"), val = tensor([1, 1])]; + tensor obj_51_pad_0 = const()[name = string("obj_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_51_dilations_0 = const()[name = string("obj_51_dilations_0"), val = tensor([1, 1])]; + int32 obj_51_groups_0 = const()[name = string("obj_51_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82962432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85059648))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_51_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_51_dilations_0, groups = obj_51_groups_0, pad = obj_51_pad_0, pad_type = obj_51_pad_type_0, strides = obj_51_strides_0, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("obj_51_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_51_cast_fp16)[name = string("inputs_45_cast_fp16")]; + tensor inputs_sq_47_cast_fp16 = mul(x = inputs_45_cast_fp16, y = inputs_45_cast_fp16)[name = string("inputs_sq_47_cast_fp16")]; + tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([1])]; + bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; + tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = inputs_sq_47_cast_fp16)[name = string("variance_47_cast_fp16")]; + fp16 var_2448_to_fp16 = const()[name = string("op_2448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2449_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2448_to_fp16)[name = string("op_2449_cast_fp16")]; + fp32 var_2450_epsilon_0 = const()[name = string("op_2450_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2450_cast_fp16 = rsqrt(epsilon = var_2450_epsilon_0, x = var_2449_cast_fp16)[name = string("op_2450_cast_fp16")]; + tensor hidden_states_57_cast_fp16 = mul(x = inputs_45_cast_fp16, y = var_2450_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; + tensor w_47_to_fp16 = const()[name = string("w_47_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85060224)))]; + tensor input_43_cast_fp16 = mul(x = w_47_to_fp16, y = hidden_states_57_cast_fp16)[name = string("input_43_cast_fp16")]; + string input_45_pad_type_0 = const()[name = string("input_45_pad_type_0"), val = string("valid")]; + tensor input_45_strides_0 = const()[name = string("input_45_strides_0"), val = tensor([1, 1])]; + tensor input_45_pad_0 = const()[name = string("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_45_dilations_0 = const()[name = string("input_45_dilations_0"), val = tensor([1, 1])]; + int32 input_45_groups_0 = const()[name = string("input_45_groups_0"), val = int32(1)]; + tensor layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85062336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88208128))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_45_cast_fp16 = conv(dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("input_45_cast_fp16")]; + tensor var_2464_cast_fp16 = silu(x = input_45_cast_fp16)[name = string("op_2464_cast_fp16")]; + string var_2470_pad_type_0 = const()[name = string("op_2470_pad_type_0"), val = string("valid")]; + tensor var_2470_strides_0 = const()[name = string("op_2470_strides_0"), val = tensor([1, 1])]; + tensor var_2470_pad_0 = const()[name = string("op_2470_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2470_dilations_0 = const()[name = string("op_2470_dilations_0"), val = tensor([1, 1])]; + int32 var_2470_groups_0 = const()[name = string("op_2470_groups_0"), val = int32(1)]; + tensor layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88208704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91354496))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_2470_cast_fp16 = conv(dilations = var_2470_dilations_0, groups = var_2470_groups_0, pad = var_2470_pad_0, pad_type = var_2470_pad_type_0, strides = var_2470_strides_0, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_2470_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_2464_cast_fp16, y = var_2470_cast_fp16)[name = string("input_47_cast_fp16")]; + string hidden_states_59_pad_type_0 = const()[name = string("hidden_states_59_pad_type_0"), val = string("valid")]; + tensor hidden_states_59_strides_0 = const()[name = string("hidden_states_59_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_59_pad_0 = const()[name = string("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_59_dilations_0 = const()[name = string("hidden_states_59_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_59_groups_0 = const()[name = string("hidden_states_59_groups_0"), val = int32(1)]; + tensor layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91355072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94500864))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_59_cast_fp16 = conv(dilations = hidden_states_59_dilations_0, groups = hidden_states_59_groups_0, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = hidden_states_59_strides_0, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("inputs_47_cast_fp16")]; + int32 var_2484 = const()[name = string("op_2484"), val = int32(3)]; + int32 var_2494 = const()[name = string("op_2494"), val = int32(-2)]; + int32 var_2502 = const()[name = string("op_2502"), val = int32(1)]; + tensor inputs_sq_49_cast_fp16 = mul(x = inputs_47_cast_fp16, y = inputs_47_cast_fp16)[name = string("inputs_sq_49_cast_fp16")]; + tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([1])]; + bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; + tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = inputs_sq_49_cast_fp16)[name = string("variance_49_cast_fp16")]; + fp16 var_2514_to_fp16 = const()[name = string("op_2514_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2515_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2514_to_fp16)[name = string("op_2515_cast_fp16")]; + fp32 var_2516_epsilon_0 = const()[name = string("op_2516_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2516_cast_fp16 = rsqrt(epsilon = var_2516_epsilon_0, x = var_2515_cast_fp16)[name = string("op_2516_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = mul(x = inputs_47_cast_fp16, y = var_2516_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor w_49_to_fp16 = const()[name = string("w_49_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94501440)))]; + tensor obj_53_cast_fp16 = mul(x = w_49_to_fp16, y = hidden_states_61_cast_fp16)[name = string("obj_53_cast_fp16")]; + string query_37_pad_type_0 = const()[name = string("query_37_pad_type_0"), val = string("valid")]; + tensor query_37_strides_0 = const()[name = string("query_37_strides_0"), val = tensor([1, 1])]; + tensor query_37_pad_0 = const()[name = string("query_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_37_dilations_0 = const()[name = string("query_37_dilations_0"), val = tensor([1, 1])]; + int32 query_37_groups_0 = const()[name = string("query_37_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94503552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96600768))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_37_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_37_dilations_0, groups = query_37_groups_0, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = query_37_strides_0, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("query_37_cast_fp16")]; + string current_key_25_pad_type_0 = const()[name = string("current_key_25_pad_type_0"), val = string("valid")]; + tensor current_key_25_strides_0 = const()[name = string("current_key_25_strides_0"), val = tensor([1, 1])]; + tensor current_key_25_pad_0 = const()[name = string("current_key_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_25_dilations_0 = const()[name = string("current_key_25_dilations_0"), val = tensor([1, 1])]; + int32 current_key_25_groups_0 = const()[name = string("current_key_25_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96601344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97649984))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_25_cast_fp16 = conv(dilations = current_key_25_dilations_0, groups = current_key_25_groups_0, pad = current_key_25_pad_0, pad_type = current_key_25_pad_type_0, strides = current_key_25_strides_0, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("current_key_25_cast_fp16")]; + string current_value_13_pad_type_0 = const()[name = string("current_value_13_pad_type_0"), val = string("valid")]; + tensor current_value_13_strides_0 = const()[name = string("current_value_13_strides_0"), val = tensor([1, 1])]; + tensor current_value_13_pad_0 = const()[name = string("current_value_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_13_dilations_0 = const()[name = string("current_value_13_dilations_0"), val = tensor([1, 1])]; + int32 current_value_13_groups_0 = const()[name = string("current_value_13_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97650560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98699200))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_13_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_13_dilations_0, groups = current_value_13_groups_0, pad = current_value_13_pad_0, pad_type = current_value_13_pad_type_0, strides = current_value_13_strides_0, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("current_value_13_cast_fp16")]; + tensor var_2553 = const()[name = string("op_2553"), val = tensor([16, 128, 1, 1])]; + tensor inputs_49_cast_fp16 = reshape(shape = var_2553, x = query_37_cast_fp16)[name = string("inputs_49_cast_fp16")]; + tensor inputs_sq_51_cast_fp16 = mul(x = inputs_49_cast_fp16, y = inputs_49_cast_fp16)[name = string("inputs_sq_51_cast_fp16")]; + tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([1])]; + bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; + tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = inputs_sq_51_cast_fp16)[name = string("variance_51_cast_fp16")]; + fp16 var_2559_to_fp16 = const()[name = string("op_2559_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2560_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2559_to_fp16)[name = string("op_2560_cast_fp16")]; + fp32 var_2561_epsilon_0 = const()[name = string("op_2561_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2561_cast_fp16 = rsqrt(epsilon = var_2561_epsilon_0, x = var_2560_cast_fp16)[name = string("op_2561_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = mul(x = inputs_49_cast_fp16, y = var_2561_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor w_51_to_fp16 = const()[name = string("w_51_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98699776)))]; + tensor query_normed_13_cast_fp16 = mul(x = w_51_to_fp16, y = hidden_states_63_cast_fp16)[name = string("query_normed_13_cast_fp16")]; + tensor var_2569 = const()[name = string("op_2569"), val = tensor([8, 128, 1, 1])]; + tensor inputs_51_cast_fp16 = reshape(shape = var_2569, x = current_key_25_cast_fp16)[name = string("inputs_51_cast_fp16")]; + tensor inputs_sq_53_cast_fp16 = mul(x = inputs_51_cast_fp16, y = inputs_51_cast_fp16)[name = string("inputs_sq_53_cast_fp16")]; + tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([1])]; + bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; + tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = inputs_sq_53_cast_fp16)[name = string("variance_53_cast_fp16")]; + fp16 var_2575_to_fp16 = const()[name = string("op_2575_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2576_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2575_to_fp16)[name = string("op_2576_cast_fp16")]; + fp32 var_2577_epsilon_0 = const()[name = string("op_2577_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2577_cast_fp16 = rsqrt(epsilon = var_2577_epsilon_0, x = var_2576_cast_fp16)[name = string("op_2577_cast_fp16")]; + tensor hidden_states_65_cast_fp16 = mul(x = inputs_51_cast_fp16, y = var_2577_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; + tensor w_53_to_fp16 = const()[name = string("w_53_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98700096)))]; + tensor current_key_normed_13_cast_fp16 = mul(x = w_53_to_fp16, y = hidden_states_65_cast_fp16)[name = string("current_key_normed_13_cast_fp16")]; + tensor var_2595 = const()[name = string("op_2595"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_37_cast_fp16 = reshape(shape = var_2595, x = query_normed_13_cast_fp16)[name = string("mh_q_37_cast_fp16")]; + tensor var_2597 = const()[name = string("op_2597"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_25_cast_fp16 = reshape(shape = var_2597, x = current_key_normed_13_cast_fp16)[name = string("mh_k_25_cast_fp16")]; + tensor var_2601_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2601_cast_fp16")]; + tensor var_2606_begin_0 = const()[name = string("op_2606_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2606_end_0 = const()[name = string("op_2606_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_2606_end_mask_0 = const()[name = string("op_2606_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2606_cast_fp16 = slice_by_index(begin = var_2606_begin_0, end = var_2606_end_0, end_mask = var_2606_end_mask_0, x = mh_q_37_cast_fp16)[name = string("op_2606_cast_fp16")]; + tensor var_2612_begin_0 = const()[name = string("op_2612_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2612_end_0 = const()[name = string("op_2612_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_2612_end_mask_0 = const()[name = string("op_2612_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2612_cast_fp16 = slice_by_index(begin = var_2612_begin_0, end = var_2612_end_0, end_mask = var_2612_end_mask_0, x = mh_q_37_cast_fp16)[name = string("op_2612_cast_fp16")]; + fp16 const_155_promoted_to_fp16 = const()[name = string("const_155_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2614_cast_fp16 = mul(x = var_2612_cast_fp16, y = const_155_promoted_to_fp16)[name = string("op_2614_cast_fp16")]; + bool var_2616_interleave_0 = const()[name = string("op_2616_interleave_0"), val = bool(false)]; + tensor var_2616_cast_fp16 = concat(axis = var_2494, interleave = var_2616_interleave_0, values = (var_2614_cast_fp16, var_2606_cast_fp16))[name = string("op_2616_cast_fp16")]; + tensor var_2617_cast_fp16 = mul(x = var_2616_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2617_cast_fp16")]; + tensor mh_q_39_cast_fp16 = add(x = var_2601_cast_fp16, y = var_2617_cast_fp16)[name = string("mh_q_39_cast_fp16")]; + tensor var_2619_cast_fp16 = mul(x = mh_k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2619_cast_fp16")]; + tensor var_2624_begin_0 = const()[name = string("op_2624_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2624_end_0 = const()[name = string("op_2624_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_2624_end_mask_0 = const()[name = string("op_2624_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2624_cast_fp16 = slice_by_index(begin = var_2624_begin_0, end = var_2624_end_0, end_mask = var_2624_end_mask_0, x = mh_k_25_cast_fp16)[name = string("op_2624_cast_fp16")]; + tensor var_2630_begin_0 = const()[name = string("op_2630_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2630_end_0 = const()[name = string("op_2630_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_2630_end_mask_0 = const()[name = string("op_2630_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2630_cast_fp16 = slice_by_index(begin = var_2630_begin_0, end = var_2630_end_0, end_mask = var_2630_end_mask_0, x = mh_k_25_cast_fp16)[name = string("op_2630_cast_fp16")]; + fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2632_cast_fp16 = mul(x = var_2630_cast_fp16, y = const_158_promoted_to_fp16)[name = string("op_2632_cast_fp16")]; + bool var_2634_interleave_0 = const()[name = string("op_2634_interleave_0"), val = bool(false)]; + tensor var_2634_cast_fp16 = concat(axis = var_2494, interleave = var_2634_interleave_0, values = (var_2632_cast_fp16, var_2624_cast_fp16))[name = string("op_2634_cast_fp16")]; + tensor var_2635_cast_fp16 = mul(x = var_2634_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2635_cast_fp16")]; + tensor mh_k_27_cast_fp16 = add(x = var_2619_cast_fp16, y = var_2635_cast_fp16)[name = string("mh_k_27_cast_fp16")]; + tensor var_2639 = const()[name = string("op_2639"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_27_cast_fp16 = reshape(shape = var_2639, x = mh_k_27_cast_fp16)[name = string("current_key_27_cast_fp16")]; + tensor var_2646_cast_fp16 = mul(x = var_101_cast_fp16_6, y = var_323_cast_fp16)[name = string("op_2646_cast_fp16")]; + tensor var_2647_cast_fp16 = mul(x = current_key_27_cast_fp16, y = var_321_cast_fp16)[name = string("op_2647_cast_fp16")]; + tensor key_39_cast_fp16 = add(x = var_2646_cast_fp16, y = var_2647_cast_fp16)[name = string("key_39_cast_fp16")]; + tensor var_2650_cast_fp16 = mul(x = var_132_cast_fp16_6, y = var_323_cast_fp16)[name = string("op_2650_cast_fp16")]; + tensor var_2651_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_321_cast_fp16)[name = string("op_2651_cast_fp16")]; + tensor value_25_cast_fp16 = add(x = var_2650_cast_fp16, y = var_2651_cast_fp16)[name = string("value_25_cast_fp16")]; + tensor var_2655 = const()[name = string("op_2655"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_25_cast_fp16 = reshape(shape = var_2655, x = key_39_cast_fp16)[name = string("key_heads_25_cast_fp16")]; + tensor var_2657 = const()[name = string("op_2657"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_25_cast_fp16 = reshape(shape = var_2657, x = value_25_cast_fp16)[name = string("value_heads_25_cast_fp16")]; + tensor var_2660_begin_0 = const()[name = string("op_2660_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2660_end_0 = const()[name = string("op_2660_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2660_end_mask_0 = const()[name = string("op_2660_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2660_cast_fp16 = slice_by_index(begin = var_2660_begin_0, end = var_2660_end_0, end_mask = var_2660_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2660_cast_fp16")]; + tensor var_2664_begin_0 = const()[name = string("op_2664_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2664_end_0 = const()[name = string("op_2664_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2664_end_mask_0 = const()[name = string("op_2664_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2664_cast_fp16 = slice_by_index(begin = var_2664_begin_0, end = var_2664_end_0, end_mask = var_2664_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2664_cast_fp16")]; + tensor var_2676_begin_0 = const()[name = string("op_2676_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_2676_end_0 = const()[name = string("op_2676_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_2676_end_mask_0 = const()[name = string("op_2676_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2676_cast_fp16 = slice_by_index(begin = var_2676_begin_0, end = var_2676_end_0, end_mask = var_2676_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2676_cast_fp16")]; + tensor var_2680_begin_0 = const()[name = string("op_2680_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_2680_end_0 = const()[name = string("op_2680_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_2680_end_mask_0 = const()[name = string("op_2680_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2680_cast_fp16 = slice_by_index(begin = var_2680_begin_0, end = var_2680_end_0, end_mask = var_2680_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2680_cast_fp16")]; + tensor var_2692_begin_0 = const()[name = string("op_2692_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_2692_end_0 = const()[name = string("op_2692_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_2692_end_mask_0 = const()[name = string("op_2692_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2692_cast_fp16 = slice_by_index(begin = var_2692_begin_0, end = var_2692_end_0, end_mask = var_2692_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2692_cast_fp16")]; + tensor var_2696_begin_0 = const()[name = string("op_2696_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_2696_end_0 = const()[name = string("op_2696_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_2696_end_mask_0 = const()[name = string("op_2696_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2696_cast_fp16 = slice_by_index(begin = var_2696_begin_0, end = var_2696_end_0, end_mask = var_2696_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2696_cast_fp16")]; + tensor var_2708_begin_0 = const()[name = string("op_2708_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_2708_end_0 = const()[name = string("op_2708_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_2708_end_mask_0 = const()[name = string("op_2708_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2708_cast_fp16 = slice_by_index(begin = var_2708_begin_0, end = var_2708_end_0, end_mask = var_2708_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2708_cast_fp16")]; + tensor var_2712_begin_0 = const()[name = string("op_2712_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_2712_end_0 = const()[name = string("op_2712_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_2712_end_mask_0 = const()[name = string("op_2712_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2712_cast_fp16 = slice_by_index(begin = var_2712_begin_0, end = var_2712_end_0, end_mask = var_2712_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2712_cast_fp16")]; + tensor var_2724_begin_0 = const()[name = string("op_2724_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_2724_end_0 = const()[name = string("op_2724_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_2724_end_mask_0 = const()[name = string("op_2724_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2724_cast_fp16 = slice_by_index(begin = var_2724_begin_0, end = var_2724_end_0, end_mask = var_2724_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2724_cast_fp16")]; + tensor var_2728_begin_0 = const()[name = string("op_2728_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_2728_end_0 = const()[name = string("op_2728_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_2728_end_mask_0 = const()[name = string("op_2728_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2728_cast_fp16 = slice_by_index(begin = var_2728_begin_0, end = var_2728_end_0, end_mask = var_2728_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2728_cast_fp16")]; + tensor var_2740_begin_0 = const()[name = string("op_2740_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_2740_end_0 = const()[name = string("op_2740_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_2740_end_mask_0 = const()[name = string("op_2740_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2740_cast_fp16 = slice_by_index(begin = var_2740_begin_0, end = var_2740_end_0, end_mask = var_2740_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2740_cast_fp16")]; + tensor var_2744_begin_0 = const()[name = string("op_2744_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_2744_end_0 = const()[name = string("op_2744_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_2744_end_mask_0 = const()[name = string("op_2744_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2744_cast_fp16 = slice_by_index(begin = var_2744_begin_0, end = var_2744_end_0, end_mask = var_2744_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2744_cast_fp16")]; + tensor var_2756_begin_0 = const()[name = string("op_2756_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_2756_end_0 = const()[name = string("op_2756_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_2756_end_mask_0 = const()[name = string("op_2756_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2756_cast_fp16 = slice_by_index(begin = var_2756_begin_0, end = var_2756_end_0, end_mask = var_2756_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2756_cast_fp16")]; + tensor var_2760_begin_0 = const()[name = string("op_2760_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_2760_end_0 = const()[name = string("op_2760_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_2760_end_mask_0 = const()[name = string("op_2760_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2760_cast_fp16 = slice_by_index(begin = var_2760_begin_0, end = var_2760_end_0, end_mask = var_2760_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2760_cast_fp16")]; + tensor var_2772_begin_0 = const()[name = string("op_2772_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_2772_end_0 = const()[name = string("op_2772_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2772_end_mask_0 = const()[name = string("op_2772_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2772_cast_fp16 = slice_by_index(begin = var_2772_begin_0, end = var_2772_end_0, end_mask = var_2772_end_mask_0, x = key_heads_25_cast_fp16)[name = string("op_2772_cast_fp16")]; + tensor var_2776_begin_0 = const()[name = string("op_2776_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_2776_end_0 = const()[name = string("op_2776_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_2776_end_mask_0 = const()[name = string("op_2776_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2776_cast_fp16 = slice_by_index(begin = var_2776_begin_0, end = var_2776_end_0, end_mask = var_2776_end_mask_0, x = value_heads_25_cast_fp16)[name = string("op_2776_cast_fp16")]; + bool key_heads_27_interleave_0 = const()[name = string("key_heads_27_interleave_0"), val = bool(false)]; + tensor key_heads_27_cast_fp16 = concat(axis = var_2502, interleave = key_heads_27_interleave_0, values = (var_2660_cast_fp16, var_2660_cast_fp16, var_2676_cast_fp16, var_2676_cast_fp16, var_2692_cast_fp16, var_2692_cast_fp16, var_2708_cast_fp16, var_2708_cast_fp16, var_2724_cast_fp16, var_2724_cast_fp16, var_2740_cast_fp16, var_2740_cast_fp16, var_2756_cast_fp16, var_2756_cast_fp16, var_2772_cast_fp16, var_2772_cast_fp16))[name = string("key_heads_27_cast_fp16")]; + bool value_heads_27_interleave_0 = const()[name = string("value_heads_27_interleave_0"), val = bool(false)]; + tensor value_heads_27_cast_fp16 = concat(axis = var_2502, interleave = value_heads_27_interleave_0, values = (var_2664_cast_fp16, var_2664_cast_fp16, var_2680_cast_fp16, var_2680_cast_fp16, var_2696_cast_fp16, var_2696_cast_fp16, var_2712_cast_fp16, var_2712_cast_fp16, var_2728_cast_fp16, var_2728_cast_fp16, var_2744_cast_fp16, var_2744_cast_fp16, var_2760_cast_fp16, var_2760_cast_fp16, var_2776_cast_fp16, var_2776_cast_fp16))[name = string("value_heads_27_cast_fp16")]; + fp16 var_2799_to_fp16 = const()[name = string("op_2799_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_2800_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2799_to_fp16)[name = string("op_2800_cast_fp16")]; + bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)]; + bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_2800_cast_fp16, y = key_heads_27_cast_fp16)[name = string("mh_w_25_cast_fp16")]; + tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_27_cast_fp16")]; + tensor var_2812_cast_fp16 = softmax(axis = var_2484, x = mh_w_27_cast_fp16)[name = string("op_2812_cast_fp16")]; + bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; + bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = value_heads_27_cast_fp16, y = var_2812_cast_fp16)[name = string("attn_13_cast_fp16")]; + tensor var_2817 = const()[name = string("op_2817"), val = tensor([1, -1, 1, 1])]; + tensor input_49_cast_fp16 = reshape(shape = var_2817, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")]; + string obj_59_pad_type_0 = const()[name = string("obj_59_pad_type_0"), val = string("valid")]; + tensor obj_59_strides_0 = const()[name = string("obj_59_strides_0"), val = tensor([1, 1])]; + tensor obj_59_pad_0 = const()[name = string("obj_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_59_dilations_0 = const()[name = string("obj_59_dilations_0"), val = tensor([1, 1])]; + int32 obj_59_groups_0 = const()[name = string("obj_59_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98700416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100797632))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_59_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_59_dilations_0, groups = obj_59_groups_0, pad = obj_59_pad_0, pad_type = obj_59_pad_type_0, strides = obj_59_strides_0, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("obj_59_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_47_cast_fp16, y = obj_59_cast_fp16)[name = string("inputs_53_cast_fp16")]; + tensor inputs_sq_55_cast_fp16 = mul(x = inputs_53_cast_fp16, y = inputs_53_cast_fp16)[name = string("inputs_sq_55_cast_fp16")]; + tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([1])]; + bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; + tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = inputs_sq_55_cast_fp16)[name = string("variance_55_cast_fp16")]; + fp16 var_2835_to_fp16 = const()[name = string("op_2835_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2836_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2835_to_fp16)[name = string("op_2836_cast_fp16")]; + fp32 var_2837_epsilon_0 = const()[name = string("op_2837_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2837_cast_fp16 = rsqrt(epsilon = var_2837_epsilon_0, x = var_2836_cast_fp16)[name = string("op_2837_cast_fp16")]; + tensor hidden_states_67_cast_fp16 = mul(x = inputs_53_cast_fp16, y = var_2837_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor w_55_to_fp16 = const()[name = string("w_55_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100798208)))]; + tensor input_51_cast_fp16 = mul(x = w_55_to_fp16, y = hidden_states_67_cast_fp16)[name = string("input_51_cast_fp16")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100800320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103946112))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_53_cast_fp16 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("input_53_cast_fp16")]; + tensor var_2851_cast_fp16 = silu(x = input_53_cast_fp16)[name = string("op_2851_cast_fp16")]; + string var_2857_pad_type_0 = const()[name = string("op_2857_pad_type_0"), val = string("valid")]; + tensor var_2857_strides_0 = const()[name = string("op_2857_strides_0"), val = tensor([1, 1])]; + tensor var_2857_pad_0 = const()[name = string("op_2857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2857_dilations_0 = const()[name = string("op_2857_dilations_0"), val = tensor([1, 1])]; + int32 var_2857_groups_0 = const()[name = string("op_2857_groups_0"), val = int32(1)]; + tensor layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103946688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107092480))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_2857_cast_fp16 = conv(dilations = var_2857_dilations_0, groups = var_2857_groups_0, pad = var_2857_pad_0, pad_type = var_2857_pad_type_0, strides = var_2857_strides_0, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_2857_cast_fp16")]; + tensor input_55_cast_fp16 = mul(x = var_2851_cast_fp16, y = var_2857_cast_fp16)[name = string("input_55_cast_fp16")]; + string hidden_states_69_pad_type_0 = const()[name = string("hidden_states_69_pad_type_0"), val = string("valid")]; + tensor hidden_states_69_strides_0 = const()[name = string("hidden_states_69_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_69_pad_0 = const()[name = string("hidden_states_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_69_dilations_0 = const()[name = string("hidden_states_69_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_69_groups_0 = const()[name = string("hidden_states_69_groups_0"), val = int32(1)]; + tensor layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107093056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110238848))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_69_cast_fp16 = conv(dilations = hidden_states_69_dilations_0, groups = hidden_states_69_groups_0, pad = hidden_states_69_pad_0, pad_type = hidden_states_69_pad_type_0, strides = hidden_states_69_strides_0, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("inputs_55_cast_fp16")]; + int32 var_2871 = const()[name = string("op_2871"), val = int32(3)]; + int32 var_2881 = const()[name = string("op_2881"), val = int32(-2)]; + int32 var_2889 = const()[name = string("op_2889"), val = int32(1)]; + tensor inputs_sq_57_cast_fp16 = mul(x = inputs_55_cast_fp16, y = inputs_55_cast_fp16)[name = string("inputs_sq_57_cast_fp16")]; + tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([1])]; + bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; + tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = inputs_sq_57_cast_fp16)[name = string("variance_57_cast_fp16")]; + fp16 var_2901_to_fp16 = const()[name = string("op_2901_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2902_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2901_to_fp16)[name = string("op_2902_cast_fp16")]; + fp32 var_2903_epsilon_0 = const()[name = string("op_2903_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2903_cast_fp16 = rsqrt(epsilon = var_2903_epsilon_0, x = var_2902_cast_fp16)[name = string("op_2903_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = mul(x = inputs_55_cast_fp16, y = var_2903_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor w_57_to_fp16 = const()[name = string("w_57_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110239424)))]; + tensor obj_61_cast_fp16 = mul(x = w_57_to_fp16, y = hidden_states_71_cast_fp16)[name = string("obj_61_cast_fp16")]; + string query_43_pad_type_0 = const()[name = string("query_43_pad_type_0"), val = string("valid")]; + tensor query_43_strides_0 = const()[name = string("query_43_strides_0"), val = tensor([1, 1])]; + tensor query_43_pad_0 = const()[name = string("query_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_43_dilations_0 = const()[name = string("query_43_dilations_0"), val = tensor([1, 1])]; + int32 query_43_groups_0 = const()[name = string("query_43_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110241536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112338752))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_43_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_43_dilations_0, groups = query_43_groups_0, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = query_43_strides_0, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("query_43_cast_fp16")]; + string current_key_29_pad_type_0 = const()[name = string("current_key_29_pad_type_0"), val = string("valid")]; + tensor current_key_29_strides_0 = const()[name = string("current_key_29_strides_0"), val = tensor([1, 1])]; + tensor current_key_29_pad_0 = const()[name = string("current_key_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_29_dilations_0 = const()[name = string("current_key_29_dilations_0"), val = tensor([1, 1])]; + int32 current_key_29_groups_0 = const()[name = string("current_key_29_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112339328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113387968))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_29_cast_fp16 = conv(dilations = current_key_29_dilations_0, groups = current_key_29_groups_0, pad = current_key_29_pad_0, pad_type = current_key_29_pad_type_0, strides = current_key_29_strides_0, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("current_key_29_cast_fp16")]; + string current_value_15_pad_type_0 = const()[name = string("current_value_15_pad_type_0"), val = string("valid")]; + tensor current_value_15_strides_0 = const()[name = string("current_value_15_strides_0"), val = tensor([1, 1])]; + tensor current_value_15_pad_0 = const()[name = string("current_value_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_15_dilations_0 = const()[name = string("current_value_15_dilations_0"), val = tensor([1, 1])]; + int32 current_value_15_groups_0 = const()[name = string("current_value_15_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113388544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114437184))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_15_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_15_dilations_0, groups = current_value_15_groups_0, pad = current_value_15_pad_0, pad_type = current_value_15_pad_type_0, strides = current_value_15_strides_0, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("current_value_15_cast_fp16")]; + tensor var_2940 = const()[name = string("op_2940"), val = tensor([16, 128, 1, 1])]; + tensor inputs_57_cast_fp16 = reshape(shape = var_2940, x = query_43_cast_fp16)[name = string("inputs_57_cast_fp16")]; + tensor inputs_sq_59_cast_fp16 = mul(x = inputs_57_cast_fp16, y = inputs_57_cast_fp16)[name = string("inputs_sq_59_cast_fp16")]; + tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([1])]; + bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; + tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = inputs_sq_59_cast_fp16)[name = string("variance_59_cast_fp16")]; + fp16 var_2946_to_fp16 = const()[name = string("op_2946_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2947_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2946_to_fp16)[name = string("op_2947_cast_fp16")]; + fp32 var_2948_epsilon_0 = const()[name = string("op_2948_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2948_cast_fp16 = rsqrt(epsilon = var_2948_epsilon_0, x = var_2947_cast_fp16)[name = string("op_2948_cast_fp16")]; + tensor hidden_states_73_cast_fp16 = mul(x = inputs_57_cast_fp16, y = var_2948_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; + tensor w_59_to_fp16 = const()[name = string("w_59_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114437760)))]; + tensor query_normed_15_cast_fp16 = mul(x = w_59_to_fp16, y = hidden_states_73_cast_fp16)[name = string("query_normed_15_cast_fp16")]; + tensor var_2956 = const()[name = string("op_2956"), val = tensor([8, 128, 1, 1])]; + tensor inputs_59_cast_fp16 = reshape(shape = var_2956, x = current_key_29_cast_fp16)[name = string("inputs_59_cast_fp16")]; + tensor inputs_sq_61_cast_fp16 = mul(x = inputs_59_cast_fp16, y = inputs_59_cast_fp16)[name = string("inputs_sq_61_cast_fp16")]; + tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([1])]; + bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; + tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = inputs_sq_61_cast_fp16)[name = string("variance_61_cast_fp16")]; + fp16 var_2962_to_fp16 = const()[name = string("op_2962_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2963_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2962_to_fp16)[name = string("op_2963_cast_fp16")]; + fp32 var_2964_epsilon_0 = const()[name = string("op_2964_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2964_cast_fp16 = rsqrt(epsilon = var_2964_epsilon_0, x = var_2963_cast_fp16)[name = string("op_2964_cast_fp16")]; + tensor hidden_states_75_cast_fp16 = mul(x = inputs_59_cast_fp16, y = var_2964_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; + tensor w_61_to_fp16 = const()[name = string("w_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114438080)))]; + tensor current_key_normed_15_cast_fp16 = mul(x = w_61_to_fp16, y = hidden_states_75_cast_fp16)[name = string("current_key_normed_15_cast_fp16")]; + tensor var_2982 = const()[name = string("op_2982"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_43_cast_fp16 = reshape(shape = var_2982, x = query_normed_15_cast_fp16)[name = string("mh_q_43_cast_fp16")]; + tensor var_2984 = const()[name = string("op_2984"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_29_cast_fp16 = reshape(shape = var_2984, x = current_key_normed_15_cast_fp16)[name = string("mh_k_29_cast_fp16")]; + tensor var_2988_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2988_cast_fp16")]; + tensor var_2993_begin_0 = const()[name = string("op_2993_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2993_end_0 = const()[name = string("op_2993_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_2993_end_mask_0 = const()[name = string("op_2993_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2993_cast_fp16 = slice_by_index(begin = var_2993_begin_0, end = var_2993_end_0, end_mask = var_2993_end_mask_0, x = mh_q_43_cast_fp16)[name = string("op_2993_cast_fp16")]; + tensor var_2999_begin_0 = const()[name = string("op_2999_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2999_end_0 = const()[name = string("op_2999_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_2999_end_mask_0 = const()[name = string("op_2999_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2999_cast_fp16 = slice_by_index(begin = var_2999_begin_0, end = var_2999_end_0, end_mask = var_2999_end_mask_0, x = mh_q_43_cast_fp16)[name = string("op_2999_cast_fp16")]; + fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3001_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_3001_cast_fp16")]; + bool var_3003_interleave_0 = const()[name = string("op_3003_interleave_0"), val = bool(false)]; + tensor var_3003_cast_fp16 = concat(axis = var_2881, interleave = var_3003_interleave_0, values = (var_3001_cast_fp16, var_2993_cast_fp16))[name = string("op_3003_cast_fp16")]; + tensor var_3004_cast_fp16 = mul(x = var_3003_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3004_cast_fp16")]; + tensor mh_q_45_cast_fp16 = add(x = var_2988_cast_fp16, y = var_3004_cast_fp16)[name = string("mh_q_45_cast_fp16")]; + tensor var_3006_cast_fp16 = mul(x = mh_k_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3006_cast_fp16")]; + tensor var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3011_end_0 = const()[name = string("op_3011_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = var_3011_end_0, end_mask = var_3011_end_mask_0, x = mh_k_29_cast_fp16)[name = string("op_3011_cast_fp16")]; + tensor var_3017_begin_0 = const()[name = string("op_3017_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3017_end_0 = const()[name = string("op_3017_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_3017_end_mask_0 = const()[name = string("op_3017_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3017_cast_fp16 = slice_by_index(begin = var_3017_begin_0, end = var_3017_end_0, end_mask = var_3017_end_mask_0, x = mh_k_29_cast_fp16)[name = string("op_3017_cast_fp16")]; + fp16 const_181_promoted_to_fp16 = const()[name = string("const_181_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3019_cast_fp16 = mul(x = var_3017_cast_fp16, y = const_181_promoted_to_fp16)[name = string("op_3019_cast_fp16")]; + bool var_3021_interleave_0 = const()[name = string("op_3021_interleave_0"), val = bool(false)]; + tensor var_3021_cast_fp16 = concat(axis = var_2881, interleave = var_3021_interleave_0, values = (var_3019_cast_fp16, var_3011_cast_fp16))[name = string("op_3021_cast_fp16")]; + tensor var_3022_cast_fp16 = mul(x = var_3021_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3022_cast_fp16")]; + tensor mh_k_31_cast_fp16 = add(x = var_3006_cast_fp16, y = var_3022_cast_fp16)[name = string("mh_k_31_cast_fp16")]; + tensor var_3026 = const()[name = string("op_3026"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_31_cast_fp16 = reshape(shape = var_3026, x = mh_k_31_cast_fp16)[name = string("current_key_31_cast_fp16")]; + tensor var_3033_cast_fp16 = mul(x = var_101_cast_fp16_7, y = var_323_cast_fp16)[name = string("op_3033_cast_fp16")]; + tensor var_3034_cast_fp16 = mul(x = current_key_31_cast_fp16, y = var_321_cast_fp16)[name = string("op_3034_cast_fp16")]; + tensor key_45_cast_fp16 = add(x = var_3033_cast_fp16, y = var_3034_cast_fp16)[name = string("key_45_cast_fp16")]; + tensor var_3037_cast_fp16 = mul(x = var_132_cast_fp16_7, y = var_323_cast_fp16)[name = string("op_3037_cast_fp16")]; + tensor var_3038_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_321_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor value_29_cast_fp16 = add(x = var_3037_cast_fp16, y = var_3038_cast_fp16)[name = string("value_29_cast_fp16")]; + tensor var_3042 = const()[name = string("op_3042"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_29_cast_fp16 = reshape(shape = var_3042, x = key_45_cast_fp16)[name = string("key_heads_29_cast_fp16")]; + tensor var_3044 = const()[name = string("op_3044"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_29_cast_fp16 = reshape(shape = var_3044, x = value_29_cast_fp16)[name = string("value_heads_29_cast_fp16")]; + tensor var_3047_begin_0 = const()[name = string("op_3047_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3047_end_0 = const()[name = string("op_3047_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3047_end_mask_0 = const()[name = string("op_3047_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3047_cast_fp16 = slice_by_index(begin = var_3047_begin_0, end = var_3047_end_0, end_mask = var_3047_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3047_cast_fp16")]; + tensor var_3051_begin_0 = const()[name = string("op_3051_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3051_end_0 = const()[name = string("op_3051_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3051_end_mask_0 = const()[name = string("op_3051_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3051_cast_fp16 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3051_cast_fp16")]; + tensor var_3063_begin_0 = const()[name = string("op_3063_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3063_end_0 = const()[name = string("op_3063_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3063_end_mask_0 = const()[name = string("op_3063_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3063_cast_fp16 = slice_by_index(begin = var_3063_begin_0, end = var_3063_end_0, end_mask = var_3063_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3063_cast_fp16")]; + tensor var_3067_begin_0 = const()[name = string("op_3067_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3067_end_0 = const()[name = string("op_3067_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3067_end_mask_0 = const()[name = string("op_3067_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3067_cast_fp16 = slice_by_index(begin = var_3067_begin_0, end = var_3067_end_0, end_mask = var_3067_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3067_cast_fp16")]; + tensor var_3079_begin_0 = const()[name = string("op_3079_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3079_end_0 = const()[name = string("op_3079_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3079_end_mask_0 = const()[name = string("op_3079_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3079_cast_fp16 = slice_by_index(begin = var_3079_begin_0, end = var_3079_end_0, end_mask = var_3079_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3079_cast_fp16")]; + tensor var_3083_begin_0 = const()[name = string("op_3083_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3083_end_0 = const()[name = string("op_3083_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3083_end_mask_0 = const()[name = string("op_3083_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3083_cast_fp16 = slice_by_index(begin = var_3083_begin_0, end = var_3083_end_0, end_mask = var_3083_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3083_cast_fp16")]; + tensor var_3095_begin_0 = const()[name = string("op_3095_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3095_end_0 = const()[name = string("op_3095_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3095_end_mask_0 = const()[name = string("op_3095_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3095_cast_fp16 = slice_by_index(begin = var_3095_begin_0, end = var_3095_end_0, end_mask = var_3095_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3095_cast_fp16")]; + tensor var_3099_begin_0 = const()[name = string("op_3099_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3099_end_0 = const()[name = string("op_3099_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3099_end_mask_0 = const()[name = string("op_3099_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3099_cast_fp16 = slice_by_index(begin = var_3099_begin_0, end = var_3099_end_0, end_mask = var_3099_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3099_cast_fp16")]; + tensor var_3111_begin_0 = const()[name = string("op_3111_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3111_end_0 = const()[name = string("op_3111_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3111_end_mask_0 = const()[name = string("op_3111_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3111_cast_fp16 = slice_by_index(begin = var_3111_begin_0, end = var_3111_end_0, end_mask = var_3111_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3111_cast_fp16")]; + tensor var_3115_begin_0 = const()[name = string("op_3115_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3115_end_0 = const()[name = string("op_3115_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3115_end_mask_0 = const()[name = string("op_3115_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3115_cast_fp16 = slice_by_index(begin = var_3115_begin_0, end = var_3115_end_0, end_mask = var_3115_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3115_cast_fp16")]; + tensor var_3127_begin_0 = const()[name = string("op_3127_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3127_end_0 = const()[name = string("op_3127_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3127_end_mask_0 = const()[name = string("op_3127_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3127_cast_fp16 = slice_by_index(begin = var_3127_begin_0, end = var_3127_end_0, end_mask = var_3127_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3127_cast_fp16")]; + tensor var_3131_begin_0 = const()[name = string("op_3131_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3131_end_0 = const()[name = string("op_3131_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3131_end_mask_0 = const()[name = string("op_3131_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3131_cast_fp16 = slice_by_index(begin = var_3131_begin_0, end = var_3131_end_0, end_mask = var_3131_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3131_cast_fp16")]; + tensor var_3143_begin_0 = const()[name = string("op_3143_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3143_end_0 = const()[name = string("op_3143_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3143_end_mask_0 = const()[name = string("op_3143_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3143_cast_fp16 = slice_by_index(begin = var_3143_begin_0, end = var_3143_end_0, end_mask = var_3143_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3143_cast_fp16")]; + tensor var_3147_begin_0 = const()[name = string("op_3147_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3147_end_0 = const()[name = string("op_3147_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3147_end_mask_0 = const()[name = string("op_3147_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3147_cast_fp16 = slice_by_index(begin = var_3147_begin_0, end = var_3147_end_0, end_mask = var_3147_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3147_cast_fp16")]; + tensor var_3159_begin_0 = const()[name = string("op_3159_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3159_end_0 = const()[name = string("op_3159_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3159_end_mask_0 = const()[name = string("op_3159_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3159_cast_fp16 = slice_by_index(begin = var_3159_begin_0, end = var_3159_end_0, end_mask = var_3159_end_mask_0, x = key_heads_29_cast_fp16)[name = string("op_3159_cast_fp16")]; + tensor var_3163_begin_0 = const()[name = string("op_3163_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3163_end_0 = const()[name = string("op_3163_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3163_end_mask_0 = const()[name = string("op_3163_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3163_cast_fp16 = slice_by_index(begin = var_3163_begin_0, end = var_3163_end_0, end_mask = var_3163_end_mask_0, x = value_heads_29_cast_fp16)[name = string("op_3163_cast_fp16")]; + bool key_heads_31_interleave_0 = const()[name = string("key_heads_31_interleave_0"), val = bool(false)]; + tensor key_heads_31_cast_fp16 = concat(axis = var_2889, interleave = key_heads_31_interleave_0, values = (var_3047_cast_fp16, var_3047_cast_fp16, var_3063_cast_fp16, var_3063_cast_fp16, var_3079_cast_fp16, var_3079_cast_fp16, var_3095_cast_fp16, var_3095_cast_fp16, var_3111_cast_fp16, var_3111_cast_fp16, var_3127_cast_fp16, var_3127_cast_fp16, var_3143_cast_fp16, var_3143_cast_fp16, var_3159_cast_fp16, var_3159_cast_fp16))[name = string("key_heads_31_cast_fp16")]; + bool value_heads_31_interleave_0 = const()[name = string("value_heads_31_interleave_0"), val = bool(false)]; + tensor value_heads_31_cast_fp16 = concat(axis = var_2889, interleave = value_heads_31_interleave_0, values = (var_3051_cast_fp16, var_3051_cast_fp16, var_3067_cast_fp16, var_3067_cast_fp16, var_3083_cast_fp16, var_3083_cast_fp16, var_3099_cast_fp16, var_3099_cast_fp16, var_3115_cast_fp16, var_3115_cast_fp16, var_3131_cast_fp16, var_3131_cast_fp16, var_3147_cast_fp16, var_3147_cast_fp16, var_3163_cast_fp16, var_3163_cast_fp16))[name = string("value_heads_31_cast_fp16")]; + fp16 var_3186_to_fp16 = const()[name = string("op_3186_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_3187_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3186_to_fp16)[name = string("op_3187_cast_fp16")]; + bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)]; + bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)]; + tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_3187_cast_fp16, y = key_heads_31_cast_fp16)[name = string("mh_w_29_cast_fp16")]; + tensor mh_w_31_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_31_cast_fp16")]; + tensor var_3199_cast_fp16 = softmax(axis = var_2871, x = mh_w_31_cast_fp16)[name = string("op_3199_cast_fp16")]; + bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)]; + bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = value_heads_31_cast_fp16, y = var_3199_cast_fp16)[name = string("attn_15_cast_fp16")]; + tensor var_3204 = const()[name = string("op_3204"), val = tensor([1, -1, 1, 1])]; + tensor input_57_cast_fp16 = reshape(shape = var_3204, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")]; + string obj_67_pad_type_0 = const()[name = string("obj_67_pad_type_0"), val = string("valid")]; + tensor obj_67_strides_0 = const()[name = string("obj_67_strides_0"), val = tensor([1, 1])]; + tensor obj_67_pad_0 = const()[name = string("obj_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_67_dilations_0 = const()[name = string("obj_67_dilations_0"), val = tensor([1, 1])]; + int32 obj_67_groups_0 = const()[name = string("obj_67_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114438400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116535616))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_67_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_67_dilations_0, groups = obj_67_groups_0, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = obj_67_strides_0, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("obj_67_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_61_cast_fp16")]; + tensor inputs_sq_63_cast_fp16 = mul(x = inputs_61_cast_fp16, y = inputs_61_cast_fp16)[name = string("inputs_sq_63_cast_fp16")]; + tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([1])]; + bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; + tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = inputs_sq_63_cast_fp16)[name = string("variance_63_cast_fp16")]; + fp16 var_3222_to_fp16 = const()[name = string("op_3222_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3223_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3222_to_fp16)[name = string("op_3223_cast_fp16")]; + fp32 var_3224_epsilon_0 = const()[name = string("op_3224_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3224_cast_fp16 = rsqrt(epsilon = var_3224_epsilon_0, x = var_3223_cast_fp16)[name = string("op_3224_cast_fp16")]; + tensor hidden_states_77_cast_fp16 = mul(x = inputs_61_cast_fp16, y = var_3224_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; + tensor w_63_to_fp16 = const()[name = string("w_63_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116536192)))]; + tensor input_59_cast_fp16 = mul(x = w_63_to_fp16, y = hidden_states_77_cast_fp16)[name = string("input_59_cast_fp16")]; + string input_61_pad_type_0 = const()[name = string("input_61_pad_type_0"), val = string("valid")]; + tensor input_61_strides_0 = const()[name = string("input_61_strides_0"), val = tensor([1, 1])]; + tensor input_61_pad_0 = const()[name = string("input_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_61_dilations_0 = const()[name = string("input_61_dilations_0"), val = tensor([1, 1])]; + int32 input_61_groups_0 = const()[name = string("input_61_groups_0"), val = int32(1)]; + tensor layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116538304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119684096))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_61_cast_fp16 = conv(dilations = input_61_dilations_0, groups = input_61_groups_0, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = input_61_strides_0, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor var_3238_cast_fp16 = silu(x = input_61_cast_fp16)[name = string("op_3238_cast_fp16")]; + string var_3244_pad_type_0 = const()[name = string("op_3244_pad_type_0"), val = string("valid")]; + tensor var_3244_strides_0 = const()[name = string("op_3244_strides_0"), val = tensor([1, 1])]; + tensor var_3244_pad_0 = const()[name = string("op_3244_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3244_dilations_0 = const()[name = string("op_3244_dilations_0"), val = tensor([1, 1])]; + int32 var_3244_groups_0 = const()[name = string("op_3244_groups_0"), val = int32(1)]; + tensor layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119684672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122830464))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_3244_cast_fp16 = conv(dilations = var_3244_dilations_0, groups = var_3244_groups_0, pad = var_3244_pad_0, pad_type = var_3244_pad_type_0, strides = var_3244_strides_0, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_3244_cast_fp16")]; + tensor input_63_cast_fp16 = mul(x = var_3238_cast_fp16, y = var_3244_cast_fp16)[name = string("input_63_cast_fp16")]; + string hidden_states_79_pad_type_0 = const()[name = string("hidden_states_79_pad_type_0"), val = string("valid")]; + tensor hidden_states_79_strides_0 = const()[name = string("hidden_states_79_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_79_pad_0 = const()[name = string("hidden_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_79_dilations_0 = const()[name = string("hidden_states_79_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_79_groups_0 = const()[name = string("hidden_states_79_groups_0"), val = int32(1)]; + tensor layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122831040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125976832))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_79_cast_fp16 = conv(dilations = hidden_states_79_dilations_0, groups = hidden_states_79_groups_0, pad = hidden_states_79_pad_0, pad_type = hidden_states_79_pad_type_0, strides = hidden_states_79_strides_0, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("inputs_63_cast_fp16")]; + int32 var_3258 = const()[name = string("op_3258"), val = int32(3)]; + int32 var_3268 = const()[name = string("op_3268"), val = int32(-2)]; + int32 var_3276 = const()[name = string("op_3276"), val = int32(1)]; + tensor inputs_sq_65_cast_fp16 = mul(x = inputs_63_cast_fp16, y = inputs_63_cast_fp16)[name = string("inputs_sq_65_cast_fp16")]; + tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([1])]; + bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; + tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = inputs_sq_65_cast_fp16)[name = string("variance_65_cast_fp16")]; + fp16 var_3288_to_fp16 = const()[name = string("op_3288_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3289_cast_fp16 = add(x = variance_65_cast_fp16, y = var_3288_to_fp16)[name = string("op_3289_cast_fp16")]; + fp32 var_3290_epsilon_0 = const()[name = string("op_3290_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3290_cast_fp16 = rsqrt(epsilon = var_3290_epsilon_0, x = var_3289_cast_fp16)[name = string("op_3290_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = mul(x = inputs_63_cast_fp16, y = var_3290_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor w_65_to_fp16 = const()[name = string("w_65_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125977408)))]; + tensor obj_69_cast_fp16 = mul(x = w_65_to_fp16, y = hidden_states_81_cast_fp16)[name = string("obj_69_cast_fp16")]; + string query_49_pad_type_0 = const()[name = string("query_49_pad_type_0"), val = string("valid")]; + tensor query_49_strides_0 = const()[name = string("query_49_strides_0"), val = tensor([1, 1])]; + tensor query_49_pad_0 = const()[name = string("query_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_49_dilations_0 = const()[name = string("query_49_dilations_0"), val = tensor([1, 1])]; + int32 query_49_groups_0 = const()[name = string("query_49_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125979520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128076736))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_49_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_49_dilations_0, groups = query_49_groups_0, pad = query_49_pad_0, pad_type = query_49_pad_type_0, strides = query_49_strides_0, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("query_49_cast_fp16")]; + string current_key_33_pad_type_0 = const()[name = string("current_key_33_pad_type_0"), val = string("valid")]; + tensor current_key_33_strides_0 = const()[name = string("current_key_33_strides_0"), val = tensor([1, 1])]; + tensor current_key_33_pad_0 = const()[name = string("current_key_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_33_dilations_0 = const()[name = string("current_key_33_dilations_0"), val = tensor([1, 1])]; + int32 current_key_33_groups_0 = const()[name = string("current_key_33_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128077312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129125952))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_33_cast_fp16 = conv(dilations = current_key_33_dilations_0, groups = current_key_33_groups_0, pad = current_key_33_pad_0, pad_type = current_key_33_pad_type_0, strides = current_key_33_strides_0, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("current_key_33_cast_fp16")]; + string current_value_17_pad_type_0 = const()[name = string("current_value_17_pad_type_0"), val = string("valid")]; + tensor current_value_17_strides_0 = const()[name = string("current_value_17_strides_0"), val = tensor([1, 1])]; + tensor current_value_17_pad_0 = const()[name = string("current_value_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_17_dilations_0 = const()[name = string("current_value_17_dilations_0"), val = tensor([1, 1])]; + int32 current_value_17_groups_0 = const()[name = string("current_value_17_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129126528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130175168))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_17_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_17_dilations_0, groups = current_value_17_groups_0, pad = current_value_17_pad_0, pad_type = current_value_17_pad_type_0, strides = current_value_17_strides_0, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("current_value_17_cast_fp16")]; + tensor var_3327 = const()[name = string("op_3327"), val = tensor([16, 128, 1, 1])]; + tensor inputs_65_cast_fp16 = reshape(shape = var_3327, x = query_49_cast_fp16)[name = string("inputs_65_cast_fp16")]; + tensor inputs_sq_67_cast_fp16 = mul(x = inputs_65_cast_fp16, y = inputs_65_cast_fp16)[name = string("inputs_sq_67_cast_fp16")]; + tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([1])]; + bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; + tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = inputs_sq_67_cast_fp16)[name = string("variance_67_cast_fp16")]; + fp16 var_3333_to_fp16 = const()[name = string("op_3333_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3334_cast_fp16 = add(x = variance_67_cast_fp16, y = var_3333_to_fp16)[name = string("op_3334_cast_fp16")]; + fp32 var_3335_epsilon_0 = const()[name = string("op_3335_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3335_cast_fp16 = rsqrt(epsilon = var_3335_epsilon_0, x = var_3334_cast_fp16)[name = string("op_3335_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = mul(x = inputs_65_cast_fp16, y = var_3335_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor w_67_to_fp16 = const()[name = string("w_67_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130175744)))]; + tensor query_normed_17_cast_fp16 = mul(x = w_67_to_fp16, y = hidden_states_83_cast_fp16)[name = string("query_normed_17_cast_fp16")]; + tensor var_3343 = const()[name = string("op_3343"), val = tensor([8, 128, 1, 1])]; + tensor inputs_67_cast_fp16 = reshape(shape = var_3343, x = current_key_33_cast_fp16)[name = string("inputs_67_cast_fp16")]; + tensor inputs_sq_69_cast_fp16 = mul(x = inputs_67_cast_fp16, y = inputs_67_cast_fp16)[name = string("inputs_sq_69_cast_fp16")]; + tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([1])]; + bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; + tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = inputs_sq_69_cast_fp16)[name = string("variance_69_cast_fp16")]; + fp16 var_3349_to_fp16 = const()[name = string("op_3349_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3350_cast_fp16 = add(x = variance_69_cast_fp16, y = var_3349_to_fp16)[name = string("op_3350_cast_fp16")]; + fp32 var_3351_epsilon_0 = const()[name = string("op_3351_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3351_cast_fp16 = rsqrt(epsilon = var_3351_epsilon_0, x = var_3350_cast_fp16)[name = string("op_3351_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = mul(x = inputs_67_cast_fp16, y = var_3351_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + tensor w_69_to_fp16 = const()[name = string("w_69_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130176064)))]; + tensor current_key_normed_17_cast_fp16 = mul(x = w_69_to_fp16, y = hidden_states_85_cast_fp16)[name = string("current_key_normed_17_cast_fp16")]; + tensor var_3369 = const()[name = string("op_3369"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_49_cast_fp16 = reshape(shape = var_3369, x = query_normed_17_cast_fp16)[name = string("mh_q_49_cast_fp16")]; + tensor var_3371 = const()[name = string("op_3371"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_33_cast_fp16 = reshape(shape = var_3371, x = current_key_normed_17_cast_fp16)[name = string("mh_k_33_cast_fp16")]; + tensor var_3375_cast_fp16 = mul(x = mh_q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3375_cast_fp16")]; + tensor var_3380_begin_0 = const()[name = string("op_3380_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3380_end_0 = const()[name = string("op_3380_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_3380_end_mask_0 = const()[name = string("op_3380_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3380_cast_fp16 = slice_by_index(begin = var_3380_begin_0, end = var_3380_end_0, end_mask = var_3380_end_mask_0, x = mh_q_49_cast_fp16)[name = string("op_3380_cast_fp16")]; + tensor var_3386_begin_0 = const()[name = string("op_3386_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3386_end_0 = const()[name = string("op_3386_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_3386_end_mask_0 = const()[name = string("op_3386_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3386_cast_fp16 = slice_by_index(begin = var_3386_begin_0, end = var_3386_end_0, end_mask = var_3386_end_mask_0, x = mh_q_49_cast_fp16)[name = string("op_3386_cast_fp16")]; + fp16 const_201_promoted_to_fp16 = const()[name = string("const_201_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3388_cast_fp16 = mul(x = var_3386_cast_fp16, y = const_201_promoted_to_fp16)[name = string("op_3388_cast_fp16")]; + bool var_3390_interleave_0 = const()[name = string("op_3390_interleave_0"), val = bool(false)]; + tensor var_3390_cast_fp16 = concat(axis = var_3268, interleave = var_3390_interleave_0, values = (var_3388_cast_fp16, var_3380_cast_fp16))[name = string("op_3390_cast_fp16")]; + tensor var_3391_cast_fp16 = mul(x = var_3390_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3391_cast_fp16")]; + tensor mh_q_51_cast_fp16 = add(x = var_3375_cast_fp16, y = var_3391_cast_fp16)[name = string("mh_q_51_cast_fp16")]; + tensor var_3393_cast_fp16 = mul(x = mh_k_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3393_cast_fp16")]; + tensor var_3398_begin_0 = const()[name = string("op_3398_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3398_end_0 = const()[name = string("op_3398_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_3398_end_mask_0 = const()[name = string("op_3398_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3398_cast_fp16 = slice_by_index(begin = var_3398_begin_0, end = var_3398_end_0, end_mask = var_3398_end_mask_0, x = mh_k_33_cast_fp16)[name = string("op_3398_cast_fp16")]; + tensor var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3404_end_0 = const()[name = string("op_3404_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = var_3404_end_0, end_mask = var_3404_end_mask_0, x = mh_k_33_cast_fp16)[name = string("op_3404_cast_fp16")]; + fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3406_cast_fp16 = mul(x = var_3404_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_3406_cast_fp16")]; + bool var_3408_interleave_0 = const()[name = string("op_3408_interleave_0"), val = bool(false)]; + tensor var_3408_cast_fp16 = concat(axis = var_3268, interleave = var_3408_interleave_0, values = (var_3406_cast_fp16, var_3398_cast_fp16))[name = string("op_3408_cast_fp16")]; + tensor var_3409_cast_fp16 = mul(x = var_3408_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3409_cast_fp16")]; + tensor mh_k_35_cast_fp16 = add(x = var_3393_cast_fp16, y = var_3409_cast_fp16)[name = string("mh_k_35_cast_fp16")]; + tensor var_3413 = const()[name = string("op_3413"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_35_cast_fp16 = reshape(shape = var_3413, x = mh_k_35_cast_fp16)[name = string("current_key_35_cast_fp16")]; + tensor var_3420_cast_fp16 = mul(x = var_101_cast_fp16_8, y = var_323_cast_fp16)[name = string("op_3420_cast_fp16")]; + tensor var_3421_cast_fp16 = mul(x = current_key_35_cast_fp16, y = var_321_cast_fp16)[name = string("op_3421_cast_fp16")]; + tensor key_51_cast_fp16 = add(x = var_3420_cast_fp16, y = var_3421_cast_fp16)[name = string("key_51_cast_fp16")]; + tensor var_3424_cast_fp16 = mul(x = var_132_cast_fp16_8, y = var_323_cast_fp16)[name = string("op_3424_cast_fp16")]; + tensor var_3425_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_321_cast_fp16)[name = string("op_3425_cast_fp16")]; + tensor value_33_cast_fp16 = add(x = var_3424_cast_fp16, y = var_3425_cast_fp16)[name = string("value_33_cast_fp16")]; + tensor var_3429 = const()[name = string("op_3429"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_33_cast_fp16 = reshape(shape = var_3429, x = key_51_cast_fp16)[name = string("key_heads_33_cast_fp16")]; + tensor var_3431 = const()[name = string("op_3431"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_33_cast_fp16 = reshape(shape = var_3431, x = value_33_cast_fp16)[name = string("value_heads_33_cast_fp16")]; + tensor var_3434_begin_0 = const()[name = string("op_3434_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3434_end_0 = const()[name = string("op_3434_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3434_end_mask_0 = const()[name = string("op_3434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3434_cast_fp16 = slice_by_index(begin = var_3434_begin_0, end = var_3434_end_0, end_mask = var_3434_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3434_cast_fp16")]; + tensor var_3438_begin_0 = const()[name = string("op_3438_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3438_end_0 = const()[name = string("op_3438_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3438_end_mask_0 = const()[name = string("op_3438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3438_cast_fp16 = slice_by_index(begin = var_3438_begin_0, end = var_3438_end_0, end_mask = var_3438_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3438_cast_fp16")]; + tensor var_3450_begin_0 = const()[name = string("op_3450_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3450_end_0 = const()[name = string("op_3450_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3450_end_mask_0 = const()[name = string("op_3450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3450_cast_fp16 = slice_by_index(begin = var_3450_begin_0, end = var_3450_end_0, end_mask = var_3450_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3450_cast_fp16")]; + tensor var_3454_begin_0 = const()[name = string("op_3454_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3454_end_0 = const()[name = string("op_3454_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3454_end_mask_0 = const()[name = string("op_3454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3454_cast_fp16 = slice_by_index(begin = var_3454_begin_0, end = var_3454_end_0, end_mask = var_3454_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3454_cast_fp16")]; + tensor var_3466_begin_0 = const()[name = string("op_3466_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3466_end_0 = const()[name = string("op_3466_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3466_end_mask_0 = const()[name = string("op_3466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3466_cast_fp16 = slice_by_index(begin = var_3466_begin_0, end = var_3466_end_0, end_mask = var_3466_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3466_cast_fp16")]; + tensor var_3470_begin_0 = const()[name = string("op_3470_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3470_end_0 = const()[name = string("op_3470_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3470_end_mask_0 = const()[name = string("op_3470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3470_cast_fp16 = slice_by_index(begin = var_3470_begin_0, end = var_3470_end_0, end_mask = var_3470_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3470_cast_fp16")]; + tensor var_3482_begin_0 = const()[name = string("op_3482_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3482_end_0 = const()[name = string("op_3482_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3482_end_mask_0 = const()[name = string("op_3482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3482_cast_fp16 = slice_by_index(begin = var_3482_begin_0, end = var_3482_end_0, end_mask = var_3482_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3482_cast_fp16")]; + tensor var_3486_begin_0 = const()[name = string("op_3486_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3486_end_0 = const()[name = string("op_3486_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3486_end_mask_0 = const()[name = string("op_3486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3486_cast_fp16 = slice_by_index(begin = var_3486_begin_0, end = var_3486_end_0, end_mask = var_3486_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3486_cast_fp16")]; + tensor var_3498_begin_0 = const()[name = string("op_3498_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3498_end_0 = const()[name = string("op_3498_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3498_end_mask_0 = const()[name = string("op_3498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3498_cast_fp16")]; + tensor var_3502_begin_0 = const()[name = string("op_3502_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3502_end_0 = const()[name = string("op_3502_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3502_end_mask_0 = const()[name = string("op_3502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3502_cast_fp16 = slice_by_index(begin = var_3502_begin_0, end = var_3502_end_0, end_mask = var_3502_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3502_cast_fp16")]; + tensor var_3514_begin_0 = const()[name = string("op_3514_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3514_end_0 = const()[name = string("op_3514_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3514_end_mask_0 = const()[name = string("op_3514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3514_cast_fp16 = slice_by_index(begin = var_3514_begin_0, end = var_3514_end_0, end_mask = var_3514_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3514_cast_fp16")]; + tensor var_3518_begin_0 = const()[name = string("op_3518_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3518_end_0 = const()[name = string("op_3518_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3518_end_mask_0 = const()[name = string("op_3518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3518_cast_fp16 = slice_by_index(begin = var_3518_begin_0, end = var_3518_end_0, end_mask = var_3518_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3518_cast_fp16")]; + tensor var_3530_begin_0 = const()[name = string("op_3530_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3530_end_0 = const()[name = string("op_3530_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3530_end_mask_0 = const()[name = string("op_3530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3530_cast_fp16 = slice_by_index(begin = var_3530_begin_0, end = var_3530_end_0, end_mask = var_3530_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3530_cast_fp16")]; + tensor var_3534_begin_0 = const()[name = string("op_3534_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3534_end_0 = const()[name = string("op_3534_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3534_end_mask_0 = const()[name = string("op_3534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3534_cast_fp16 = slice_by_index(begin = var_3534_begin_0, end = var_3534_end_0, end_mask = var_3534_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3534_cast_fp16")]; + tensor var_3546_begin_0 = const()[name = string("op_3546_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3546_end_0 = const()[name = string("op_3546_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3546_end_mask_0 = const()[name = string("op_3546_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3546_cast_fp16 = slice_by_index(begin = var_3546_begin_0, end = var_3546_end_0, end_mask = var_3546_end_mask_0, x = key_heads_33_cast_fp16)[name = string("op_3546_cast_fp16")]; + tensor var_3550_begin_0 = const()[name = string("op_3550_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3550_end_0 = const()[name = string("op_3550_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3550_end_mask_0 = const()[name = string("op_3550_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3550_cast_fp16 = slice_by_index(begin = var_3550_begin_0, end = var_3550_end_0, end_mask = var_3550_end_mask_0, x = value_heads_33_cast_fp16)[name = string("op_3550_cast_fp16")]; + bool key_heads_35_interleave_0 = const()[name = string("key_heads_35_interleave_0"), val = bool(false)]; + tensor key_heads_35_cast_fp16 = concat(axis = var_3276, interleave = key_heads_35_interleave_0, values = (var_3434_cast_fp16, var_3434_cast_fp16, var_3450_cast_fp16, var_3450_cast_fp16, var_3466_cast_fp16, var_3466_cast_fp16, var_3482_cast_fp16, var_3482_cast_fp16, var_3498_cast_fp16, var_3498_cast_fp16, var_3514_cast_fp16, var_3514_cast_fp16, var_3530_cast_fp16, var_3530_cast_fp16, var_3546_cast_fp16, var_3546_cast_fp16))[name = string("key_heads_35_cast_fp16")]; + bool value_heads_35_interleave_0 = const()[name = string("value_heads_35_interleave_0"), val = bool(false)]; + tensor value_heads_35_cast_fp16 = concat(axis = var_3276, interleave = value_heads_35_interleave_0, values = (var_3438_cast_fp16, var_3438_cast_fp16, var_3454_cast_fp16, var_3454_cast_fp16, var_3470_cast_fp16, var_3470_cast_fp16, var_3486_cast_fp16, var_3486_cast_fp16, var_3502_cast_fp16, var_3502_cast_fp16, var_3518_cast_fp16, var_3518_cast_fp16, var_3534_cast_fp16, var_3534_cast_fp16, var_3550_cast_fp16, var_3550_cast_fp16))[name = string("value_heads_35_cast_fp16")]; + fp16 var_3573_to_fp16 = const()[name = string("op_3573_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_3574_cast_fp16 = mul(x = mh_q_51_cast_fp16, y = var_3573_to_fp16)[name = string("op_3574_cast_fp16")]; + bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)]; + bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)]; + tensor mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_3574_cast_fp16, y = key_heads_35_cast_fp16)[name = string("mh_w_33_cast_fp16")]; + tensor mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_35_cast_fp16")]; + tensor var_3586_cast_fp16 = softmax(axis = var_3258, x = mh_w_35_cast_fp16)[name = string("op_3586_cast_fp16")]; + bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)]; + bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = value_heads_35_cast_fp16, y = var_3586_cast_fp16)[name = string("attn_17_cast_fp16")]; + tensor var_3591 = const()[name = string("op_3591"), val = tensor([1, -1, 1, 1])]; + tensor input_65_cast_fp16 = reshape(shape = var_3591, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")]; + string obj_75_pad_type_0 = const()[name = string("obj_75_pad_type_0"), val = string("valid")]; + tensor obj_75_strides_0 = const()[name = string("obj_75_strides_0"), val = tensor([1, 1])]; + tensor obj_75_pad_0 = const()[name = string("obj_75_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_75_dilations_0 = const()[name = string("obj_75_dilations_0"), val = tensor([1, 1])]; + int32 obj_75_groups_0 = const()[name = string("obj_75_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130176384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132273600))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_75_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_75_dilations_0, groups = obj_75_groups_0, pad = obj_75_pad_0, pad_type = obj_75_pad_type_0, strides = obj_75_strides_0, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("obj_75_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_69_cast_fp16")]; + tensor inputs_sq_71_cast_fp16 = mul(x = inputs_69_cast_fp16, y = inputs_69_cast_fp16)[name = string("inputs_sq_71_cast_fp16")]; + tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([1])]; + bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; + tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = inputs_sq_71_cast_fp16)[name = string("variance_71_cast_fp16")]; + fp16 var_3609_to_fp16 = const()[name = string("op_3609_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3610_cast_fp16 = add(x = variance_71_cast_fp16, y = var_3609_to_fp16)[name = string("op_3610_cast_fp16")]; + fp32 var_3611_epsilon_0 = const()[name = string("op_3611_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3611_cast_fp16 = rsqrt(epsilon = var_3611_epsilon_0, x = var_3610_cast_fp16)[name = string("op_3611_cast_fp16")]; + tensor hidden_states_87_cast_fp16 = mul(x = inputs_69_cast_fp16, y = var_3611_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; + tensor w_71_to_fp16 = const()[name = string("w_71_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132274176)))]; + tensor input_67_cast_fp16 = mul(x = w_71_to_fp16, y = hidden_states_87_cast_fp16)[name = string("input_67_cast_fp16")]; + string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; + tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; + tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; + int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; + tensor layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132276288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135422080))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_69_cast_fp16 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("input_69_cast_fp16")]; + tensor var_3625_cast_fp16 = silu(x = input_69_cast_fp16)[name = string("op_3625_cast_fp16")]; + string var_3631_pad_type_0 = const()[name = string("op_3631_pad_type_0"), val = string("valid")]; + tensor var_3631_strides_0 = const()[name = string("op_3631_strides_0"), val = tensor([1, 1])]; + tensor var_3631_pad_0 = const()[name = string("op_3631_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3631_dilations_0 = const()[name = string("op_3631_dilations_0"), val = tensor([1, 1])]; + int32 var_3631_groups_0 = const()[name = string("op_3631_groups_0"), val = int32(1)]; + tensor layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135422656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138568448))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_3631_cast_fp16 = conv(dilations = var_3631_dilations_0, groups = var_3631_groups_0, pad = var_3631_pad_0, pad_type = var_3631_pad_type_0, strides = var_3631_strides_0, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_3631_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_3625_cast_fp16, y = var_3631_cast_fp16)[name = string("input_71_cast_fp16")]; + string hidden_states_89_pad_type_0 = const()[name = string("hidden_states_89_pad_type_0"), val = string("valid")]; + tensor hidden_states_89_strides_0 = const()[name = string("hidden_states_89_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_89_pad_0 = const()[name = string("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_89_dilations_0 = const()[name = string("hidden_states_89_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_89_groups_0 = const()[name = string("hidden_states_89_groups_0"), val = int32(1)]; + tensor layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138569024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141714816))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_89_cast_fp16 = conv(dilations = hidden_states_89_dilations_0, groups = hidden_states_89_groups_0, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = hidden_states_89_strides_0, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("inputs_71_cast_fp16")]; + int32 var_3645 = const()[name = string("op_3645"), val = int32(3)]; + int32 var_3655 = const()[name = string("op_3655"), val = int32(-2)]; + int32 var_3663 = const()[name = string("op_3663"), val = int32(1)]; + tensor inputs_sq_73_cast_fp16 = mul(x = inputs_71_cast_fp16, y = inputs_71_cast_fp16)[name = string("inputs_sq_73_cast_fp16")]; + tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([1])]; + bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; + tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = inputs_sq_73_cast_fp16)[name = string("variance_73_cast_fp16")]; + fp16 var_3675_to_fp16 = const()[name = string("op_3675_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3676_cast_fp16 = add(x = variance_73_cast_fp16, y = var_3675_to_fp16)[name = string("op_3676_cast_fp16")]; + fp32 var_3677_epsilon_0 = const()[name = string("op_3677_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3677_cast_fp16 = rsqrt(epsilon = var_3677_epsilon_0, x = var_3676_cast_fp16)[name = string("op_3677_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = mul(x = inputs_71_cast_fp16, y = var_3677_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor w_73_to_fp16 = const()[name = string("w_73_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141715392)))]; + tensor obj_77_cast_fp16 = mul(x = w_73_to_fp16, y = hidden_states_91_cast_fp16)[name = string("obj_77_cast_fp16")]; + string query_55_pad_type_0 = const()[name = string("query_55_pad_type_0"), val = string("valid")]; + tensor query_55_strides_0 = const()[name = string("query_55_strides_0"), val = tensor([1, 1])]; + tensor query_55_pad_0 = const()[name = string("query_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_55_dilations_0 = const()[name = string("query_55_dilations_0"), val = tensor([1, 1])]; + int32 query_55_groups_0 = const()[name = string("query_55_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141717504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143814720))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_55_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_55_dilations_0, groups = query_55_groups_0, pad = query_55_pad_0, pad_type = query_55_pad_type_0, strides = query_55_strides_0, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("query_55_cast_fp16")]; + string current_key_37_pad_type_0 = const()[name = string("current_key_37_pad_type_0"), val = string("valid")]; + tensor current_key_37_strides_0 = const()[name = string("current_key_37_strides_0"), val = tensor([1, 1])]; + tensor current_key_37_pad_0 = const()[name = string("current_key_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_37_dilations_0 = const()[name = string("current_key_37_dilations_0"), val = tensor([1, 1])]; + int32 current_key_37_groups_0 = const()[name = string("current_key_37_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143815296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144863936))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_37_cast_fp16 = conv(dilations = current_key_37_dilations_0, groups = current_key_37_groups_0, pad = current_key_37_pad_0, pad_type = current_key_37_pad_type_0, strides = current_key_37_strides_0, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("current_key_37_cast_fp16")]; + string current_value_19_pad_type_0 = const()[name = string("current_value_19_pad_type_0"), val = string("valid")]; + tensor current_value_19_strides_0 = const()[name = string("current_value_19_strides_0"), val = tensor([1, 1])]; + tensor current_value_19_pad_0 = const()[name = string("current_value_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_19_dilations_0 = const()[name = string("current_value_19_dilations_0"), val = tensor([1, 1])]; + int32 current_value_19_groups_0 = const()[name = string("current_value_19_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144864512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145913152))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_19_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_19_dilations_0, groups = current_value_19_groups_0, pad = current_value_19_pad_0, pad_type = current_value_19_pad_type_0, strides = current_value_19_strides_0, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("current_value_19_cast_fp16")]; + tensor var_3714 = const()[name = string("op_3714"), val = tensor([16, 128, 1, 1])]; + tensor inputs_73_cast_fp16 = reshape(shape = var_3714, x = query_55_cast_fp16)[name = string("inputs_73_cast_fp16")]; + tensor inputs_sq_75_cast_fp16 = mul(x = inputs_73_cast_fp16, y = inputs_73_cast_fp16)[name = string("inputs_sq_75_cast_fp16")]; + tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([1])]; + bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; + tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = inputs_sq_75_cast_fp16)[name = string("variance_75_cast_fp16")]; + fp16 var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3721_cast_fp16 = add(x = variance_75_cast_fp16, y = var_3720_to_fp16)[name = string("op_3721_cast_fp16")]; + fp32 var_3722_epsilon_0 = const()[name = string("op_3722_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3722_cast_fp16 = rsqrt(epsilon = var_3722_epsilon_0, x = var_3721_cast_fp16)[name = string("op_3722_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = mul(x = inputs_73_cast_fp16, y = var_3722_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor w_75_to_fp16 = const()[name = string("w_75_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145913728)))]; + tensor query_normed_19_cast_fp16 = mul(x = w_75_to_fp16, y = hidden_states_93_cast_fp16)[name = string("query_normed_19_cast_fp16")]; + tensor var_3730 = const()[name = string("op_3730"), val = tensor([8, 128, 1, 1])]; + tensor inputs_75_cast_fp16 = reshape(shape = var_3730, x = current_key_37_cast_fp16)[name = string("inputs_75_cast_fp16")]; + tensor inputs_sq_77_cast_fp16 = mul(x = inputs_75_cast_fp16, y = inputs_75_cast_fp16)[name = string("inputs_sq_77_cast_fp16")]; + tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([1])]; + bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; + tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = inputs_sq_77_cast_fp16)[name = string("variance_77_cast_fp16")]; + fp16 var_3736_to_fp16 = const()[name = string("op_3736_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3737_cast_fp16 = add(x = variance_77_cast_fp16, y = var_3736_to_fp16)[name = string("op_3737_cast_fp16")]; + fp32 var_3738_epsilon_0 = const()[name = string("op_3738_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3738_cast_fp16 = rsqrt(epsilon = var_3738_epsilon_0, x = var_3737_cast_fp16)[name = string("op_3738_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = mul(x = inputs_75_cast_fp16, y = var_3738_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; + tensor w_77_to_fp16 = const()[name = string("w_77_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145914048)))]; + tensor current_key_normed_19_cast_fp16 = mul(x = w_77_to_fp16, y = hidden_states_95_cast_fp16)[name = string("current_key_normed_19_cast_fp16")]; + tensor var_3756 = const()[name = string("op_3756"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_55_cast_fp16 = reshape(shape = var_3756, x = query_normed_19_cast_fp16)[name = string("mh_q_55_cast_fp16")]; + tensor var_3758 = const()[name = string("op_3758"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_37_cast_fp16 = reshape(shape = var_3758, x = current_key_normed_19_cast_fp16)[name = string("mh_k_37_cast_fp16")]; + tensor var_3762_cast_fp16 = mul(x = mh_q_55_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3762_cast_fp16")]; + tensor var_3767_begin_0 = const()[name = string("op_3767_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3767_end_0 = const()[name = string("op_3767_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_3767_end_mask_0 = const()[name = string("op_3767_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3767_cast_fp16 = slice_by_index(begin = var_3767_begin_0, end = var_3767_end_0, end_mask = var_3767_end_mask_0, x = mh_q_55_cast_fp16)[name = string("op_3767_cast_fp16")]; + tensor var_3773_begin_0 = const()[name = string("op_3773_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3773_end_0 = const()[name = string("op_3773_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_3773_end_mask_0 = const()[name = string("op_3773_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3773_cast_fp16 = slice_by_index(begin = var_3773_begin_0, end = var_3773_end_0, end_mask = var_3773_end_mask_0, x = mh_q_55_cast_fp16)[name = string("op_3773_cast_fp16")]; + fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3775_cast_fp16 = mul(x = var_3773_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_3775_cast_fp16")]; + bool var_3777_interleave_0 = const()[name = string("op_3777_interleave_0"), val = bool(false)]; + tensor var_3777_cast_fp16 = concat(axis = var_3655, interleave = var_3777_interleave_0, values = (var_3775_cast_fp16, var_3767_cast_fp16))[name = string("op_3777_cast_fp16")]; + tensor var_3778_cast_fp16 = mul(x = var_3777_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3778_cast_fp16")]; + tensor mh_q_57_cast_fp16 = add(x = var_3762_cast_fp16, y = var_3778_cast_fp16)[name = string("mh_q_57_cast_fp16")]; + tensor var_3780_cast_fp16 = mul(x = mh_k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3780_cast_fp16")]; + tensor var_3785_begin_0 = const()[name = string("op_3785_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3785_end_0 = const()[name = string("op_3785_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_3785_end_mask_0 = const()[name = string("op_3785_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3785_cast_fp16 = slice_by_index(begin = var_3785_begin_0, end = var_3785_end_0, end_mask = var_3785_end_mask_0, x = mh_k_37_cast_fp16)[name = string("op_3785_cast_fp16")]; + tensor var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_3791_end_0 = const()[name = string("op_3791_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = var_3791_end_0, end_mask = var_3791_end_mask_0, x = mh_k_37_cast_fp16)[name = string("op_3791_cast_fp16")]; + fp16 const_227_promoted_to_fp16 = const()[name = string("const_227_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3793_cast_fp16 = mul(x = var_3791_cast_fp16, y = const_227_promoted_to_fp16)[name = string("op_3793_cast_fp16")]; + bool var_3795_interleave_0 = const()[name = string("op_3795_interleave_0"), val = bool(false)]; + tensor var_3795_cast_fp16 = concat(axis = var_3655, interleave = var_3795_interleave_0, values = (var_3793_cast_fp16, var_3785_cast_fp16))[name = string("op_3795_cast_fp16")]; + tensor var_3796_cast_fp16 = mul(x = var_3795_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3796_cast_fp16")]; + tensor mh_k_39_cast_fp16 = add(x = var_3780_cast_fp16, y = var_3796_cast_fp16)[name = string("mh_k_39_cast_fp16")]; + tensor var_3800 = const()[name = string("op_3800"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_39_cast_fp16 = reshape(shape = var_3800, x = mh_k_39_cast_fp16)[name = string("current_key_39_cast_fp16")]; + tensor var_3807_cast_fp16 = mul(x = var_101_cast_fp16_9, y = var_323_cast_fp16)[name = string("op_3807_cast_fp16")]; + tensor var_3808_cast_fp16 = mul(x = current_key_39_cast_fp16, y = var_321_cast_fp16)[name = string("op_3808_cast_fp16")]; + tensor key_57_cast_fp16 = add(x = var_3807_cast_fp16, y = var_3808_cast_fp16)[name = string("key_57_cast_fp16")]; + tensor var_3811_cast_fp16 = mul(x = var_132_cast_fp16_9, y = var_323_cast_fp16)[name = string("op_3811_cast_fp16")]; + tensor var_3812_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_321_cast_fp16)[name = string("op_3812_cast_fp16")]; + tensor value_37_cast_fp16 = add(x = var_3811_cast_fp16, y = var_3812_cast_fp16)[name = string("value_37_cast_fp16")]; + tensor var_3816 = const()[name = string("op_3816"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_37_cast_fp16 = reshape(shape = var_3816, x = key_57_cast_fp16)[name = string("key_heads_37_cast_fp16")]; + tensor var_3818 = const()[name = string("op_3818"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_37_cast_fp16 = reshape(shape = var_3818, x = value_37_cast_fp16)[name = string("value_heads_37_cast_fp16")]; + tensor var_3821_begin_0 = const()[name = string("op_3821_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3821_end_0 = const()[name = string("op_3821_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3821_end_mask_0 = const()[name = string("op_3821_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3821_cast_fp16 = slice_by_index(begin = var_3821_begin_0, end = var_3821_end_0, end_mask = var_3821_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3821_cast_fp16")]; + tensor var_3825_begin_0 = const()[name = string("op_3825_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3825_end_0 = const()[name = string("op_3825_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3825_end_mask_0 = const()[name = string("op_3825_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3825_cast_fp16 = slice_by_index(begin = var_3825_begin_0, end = var_3825_end_0, end_mask = var_3825_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3825_cast_fp16")]; + tensor var_3837_begin_0 = const()[name = string("op_3837_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3837_end_0 = const()[name = string("op_3837_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3837_end_mask_0 = const()[name = string("op_3837_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3837_cast_fp16 = slice_by_index(begin = var_3837_begin_0, end = var_3837_end_0, end_mask = var_3837_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3837_cast_fp16")]; + tensor var_3841_begin_0 = const()[name = string("op_3841_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_3841_end_0 = const()[name = string("op_3841_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_3841_end_mask_0 = const()[name = string("op_3841_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3841_cast_fp16 = slice_by_index(begin = var_3841_begin_0, end = var_3841_end_0, end_mask = var_3841_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3841_cast_fp16")]; + tensor var_3853_begin_0 = const()[name = string("op_3853_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3853_end_0 = const()[name = string("op_3853_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3853_end_mask_0 = const()[name = string("op_3853_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3853_cast_fp16 = slice_by_index(begin = var_3853_begin_0, end = var_3853_end_0, end_mask = var_3853_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3853_cast_fp16")]; + tensor var_3857_begin_0 = const()[name = string("op_3857_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3857_end_0 = const()[name = string("op_3857_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_3857_end_mask_0 = const()[name = string("op_3857_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3857_cast_fp16 = slice_by_index(begin = var_3857_begin_0, end = var_3857_end_0, end_mask = var_3857_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3857_cast_fp16")]; + tensor var_3869_begin_0 = const()[name = string("op_3869_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3869_end_0 = const()[name = string("op_3869_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3869_end_mask_0 = const()[name = string("op_3869_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3869_cast_fp16 = slice_by_index(begin = var_3869_begin_0, end = var_3869_end_0, end_mask = var_3869_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3869_cast_fp16")]; + tensor var_3873_begin_0 = const()[name = string("op_3873_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3873_end_0 = const()[name = string("op_3873_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_3873_end_mask_0 = const()[name = string("op_3873_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3873_cast_fp16 = slice_by_index(begin = var_3873_begin_0, end = var_3873_end_0, end_mask = var_3873_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3873_cast_fp16")]; + tensor var_3885_begin_0 = const()[name = string("op_3885_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3885_end_0 = const()[name = string("op_3885_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3885_end_mask_0 = const()[name = string("op_3885_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3885_cast_fp16 = slice_by_index(begin = var_3885_begin_0, end = var_3885_end_0, end_mask = var_3885_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3885_cast_fp16")]; + tensor var_3889_begin_0 = const()[name = string("op_3889_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3889_end_0 = const()[name = string("op_3889_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_3889_end_mask_0 = const()[name = string("op_3889_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3889_cast_fp16 = slice_by_index(begin = var_3889_begin_0, end = var_3889_end_0, end_mask = var_3889_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3889_cast_fp16")]; + tensor var_3901_begin_0 = const()[name = string("op_3901_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3901_end_0 = const()[name = string("op_3901_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3901_end_mask_0 = const()[name = string("op_3901_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3901_cast_fp16 = slice_by_index(begin = var_3901_begin_0, end = var_3901_end_0, end_mask = var_3901_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3901_cast_fp16")]; + tensor var_3905_begin_0 = const()[name = string("op_3905_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3905_end_0 = const()[name = string("op_3905_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_3905_end_mask_0 = const()[name = string("op_3905_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3905_cast_fp16 = slice_by_index(begin = var_3905_begin_0, end = var_3905_end_0, end_mask = var_3905_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3905_cast_fp16")]; + tensor var_3917_begin_0 = const()[name = string("op_3917_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3917_end_0 = const()[name = string("op_3917_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3917_end_mask_0 = const()[name = string("op_3917_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3917_cast_fp16 = slice_by_index(begin = var_3917_begin_0, end = var_3917_end_0, end_mask = var_3917_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3917_cast_fp16")]; + tensor var_3921_begin_0 = const()[name = string("op_3921_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3921_end_0 = const()[name = string("op_3921_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_3921_end_mask_0 = const()[name = string("op_3921_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3921_cast_fp16 = slice_by_index(begin = var_3921_begin_0, end = var_3921_end_0, end_mask = var_3921_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3921_cast_fp16")]; + tensor var_3933_begin_0 = const()[name = string("op_3933_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3933_end_0 = const()[name = string("op_3933_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3933_end_mask_0 = const()[name = string("op_3933_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3933_cast_fp16 = slice_by_index(begin = var_3933_begin_0, end = var_3933_end_0, end_mask = var_3933_end_mask_0, x = key_heads_37_cast_fp16)[name = string("op_3933_cast_fp16")]; + tensor var_3937_begin_0 = const()[name = string("op_3937_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3937_end_0 = const()[name = string("op_3937_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_3937_end_mask_0 = const()[name = string("op_3937_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3937_cast_fp16 = slice_by_index(begin = var_3937_begin_0, end = var_3937_end_0, end_mask = var_3937_end_mask_0, x = value_heads_37_cast_fp16)[name = string("op_3937_cast_fp16")]; + bool key_heads_39_interleave_0 = const()[name = string("key_heads_39_interleave_0"), val = bool(false)]; + tensor key_heads_39_cast_fp16 = concat(axis = var_3663, interleave = key_heads_39_interleave_0, values = (var_3821_cast_fp16, var_3821_cast_fp16, var_3837_cast_fp16, var_3837_cast_fp16, var_3853_cast_fp16, var_3853_cast_fp16, var_3869_cast_fp16, var_3869_cast_fp16, var_3885_cast_fp16, var_3885_cast_fp16, var_3901_cast_fp16, var_3901_cast_fp16, var_3917_cast_fp16, var_3917_cast_fp16, var_3933_cast_fp16, var_3933_cast_fp16))[name = string("key_heads_39_cast_fp16")]; + bool value_heads_39_interleave_0 = const()[name = string("value_heads_39_interleave_0"), val = bool(false)]; + tensor value_heads_39_cast_fp16 = concat(axis = var_3663, interleave = value_heads_39_interleave_0, values = (var_3825_cast_fp16, var_3825_cast_fp16, var_3841_cast_fp16, var_3841_cast_fp16, var_3857_cast_fp16, var_3857_cast_fp16, var_3873_cast_fp16, var_3873_cast_fp16, var_3889_cast_fp16, var_3889_cast_fp16, var_3905_cast_fp16, var_3905_cast_fp16, var_3921_cast_fp16, var_3921_cast_fp16, var_3937_cast_fp16, var_3937_cast_fp16))[name = string("value_heads_39_cast_fp16")]; + fp16 var_3960_to_fp16 = const()[name = string("op_3960_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_3961_cast_fp16 = mul(x = mh_q_57_cast_fp16, y = var_3960_to_fp16)[name = string("op_3961_cast_fp16")]; + bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)]; + bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_3961_cast_fp16, y = key_heads_39_cast_fp16)[name = string("mh_w_37_cast_fp16")]; + tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_39_cast_fp16")]; + tensor var_3973_cast_fp16 = softmax(axis = var_3645, x = mh_w_39_cast_fp16)[name = string("op_3973_cast_fp16")]; + bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)]; + bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = value_heads_39_cast_fp16, y = var_3973_cast_fp16)[name = string("attn_19_cast_fp16")]; + tensor var_3978 = const()[name = string("op_3978"), val = tensor([1, -1, 1, 1])]; + tensor input_73_cast_fp16 = reshape(shape = var_3978, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")]; + string obj_83_pad_type_0 = const()[name = string("obj_83_pad_type_0"), val = string("valid")]; + tensor obj_83_strides_0 = const()[name = string("obj_83_strides_0"), val = tensor([1, 1])]; + tensor obj_83_pad_0 = const()[name = string("obj_83_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_83_dilations_0 = const()[name = string("obj_83_dilations_0"), val = tensor([1, 1])]; + int32 obj_83_groups_0 = const()[name = string("obj_83_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145914368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148011584))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_83_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_83_dilations_0, groups = obj_83_groups_0, pad = obj_83_pad_0, pad_type = obj_83_pad_type_0, strides = obj_83_strides_0, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("obj_83_cast_fp16")]; + tensor inputs_77_cast_fp16 = add(x = inputs_71_cast_fp16, y = obj_83_cast_fp16)[name = string("inputs_77_cast_fp16")]; + tensor inputs_sq_79_cast_fp16 = mul(x = inputs_77_cast_fp16, y = inputs_77_cast_fp16)[name = string("inputs_sq_79_cast_fp16")]; + tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([1])]; + bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; + tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = inputs_sq_79_cast_fp16)[name = string("variance_79_cast_fp16")]; + fp16 var_3996_to_fp16 = const()[name = string("op_3996_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3997_cast_fp16 = add(x = variance_79_cast_fp16, y = var_3996_to_fp16)[name = string("op_3997_cast_fp16")]; + fp32 var_3998_epsilon_0 = const()[name = string("op_3998_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3998_cast_fp16 = rsqrt(epsilon = var_3998_epsilon_0, x = var_3997_cast_fp16)[name = string("op_3998_cast_fp16")]; + tensor hidden_states_97_cast_fp16 = mul(x = inputs_77_cast_fp16, y = var_3998_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; + tensor w_79_to_fp16 = const()[name = string("w_79_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148012160)))]; + tensor input_75_cast_fp16 = mul(x = w_79_to_fp16, y = hidden_states_97_cast_fp16)[name = string("input_75_cast_fp16")]; + string input_77_pad_type_0 = const()[name = string("input_77_pad_type_0"), val = string("valid")]; + tensor input_77_strides_0 = const()[name = string("input_77_strides_0"), val = tensor([1, 1])]; + tensor input_77_pad_0 = const()[name = string("input_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_77_dilations_0 = const()[name = string("input_77_dilations_0"), val = tensor([1, 1])]; + int32 input_77_groups_0 = const()[name = string("input_77_groups_0"), val = int32(1)]; + tensor layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148014272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151160064))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_77_cast_fp16 = conv(dilations = input_77_dilations_0, groups = input_77_groups_0, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = input_77_strides_0, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_4012_cast_fp16 = silu(x = input_77_cast_fp16)[name = string("op_4012_cast_fp16")]; + string var_4018_pad_type_0 = const()[name = string("op_4018_pad_type_0"), val = string("valid")]; + tensor var_4018_strides_0 = const()[name = string("op_4018_strides_0"), val = tensor([1, 1])]; + tensor var_4018_pad_0 = const()[name = string("op_4018_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4018_dilations_0 = const()[name = string("op_4018_dilations_0"), val = tensor([1, 1])]; + int32 var_4018_groups_0 = const()[name = string("op_4018_groups_0"), val = int32(1)]; + tensor layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151160640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154306432))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_4018_cast_fp16 = conv(dilations = var_4018_dilations_0, groups = var_4018_groups_0, pad = var_4018_pad_0, pad_type = var_4018_pad_type_0, strides = var_4018_strides_0, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_4018_cast_fp16")]; + tensor input_79_cast_fp16 = mul(x = var_4012_cast_fp16, y = var_4018_cast_fp16)[name = string("input_79_cast_fp16")]; + string hidden_states_99_pad_type_0 = const()[name = string("hidden_states_99_pad_type_0"), val = string("valid")]; + tensor hidden_states_99_strides_0 = const()[name = string("hidden_states_99_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_99_pad_0 = const()[name = string("hidden_states_99_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_99_dilations_0 = const()[name = string("hidden_states_99_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_99_groups_0 = const()[name = string("hidden_states_99_groups_0"), val = int32(1)]; + tensor layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154307008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157452800))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_99_cast_fp16 = conv(dilations = hidden_states_99_dilations_0, groups = hidden_states_99_groups_0, pad = hidden_states_99_pad_0, pad_type = hidden_states_99_pad_type_0, strides = hidden_states_99_strides_0, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("inputs_79_cast_fp16")]; + int32 var_4032 = const()[name = string("op_4032"), val = int32(3)]; + int32 var_4042 = const()[name = string("op_4042"), val = int32(-2)]; + int32 var_4050 = const()[name = string("op_4050"), val = int32(1)]; + tensor inputs_sq_81_cast_fp16 = mul(x = inputs_79_cast_fp16, y = inputs_79_cast_fp16)[name = string("inputs_sq_81_cast_fp16")]; + tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([1])]; + bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; + tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = inputs_sq_81_cast_fp16)[name = string("variance_81_cast_fp16")]; + fp16 var_4062_to_fp16 = const()[name = string("op_4062_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4063_cast_fp16 = add(x = variance_81_cast_fp16, y = var_4062_to_fp16)[name = string("op_4063_cast_fp16")]; + fp32 var_4064_epsilon_0 = const()[name = string("op_4064_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4064_cast_fp16 = rsqrt(epsilon = var_4064_epsilon_0, x = var_4063_cast_fp16)[name = string("op_4064_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = mul(x = inputs_79_cast_fp16, y = var_4064_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor w_81_to_fp16 = const()[name = string("w_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157453376)))]; + tensor obj_85_cast_fp16 = mul(x = w_81_to_fp16, y = hidden_states_101_cast_fp16)[name = string("obj_85_cast_fp16")]; + string query_61_pad_type_0 = const()[name = string("query_61_pad_type_0"), val = string("valid")]; + tensor query_61_strides_0 = const()[name = string("query_61_strides_0"), val = tensor([1, 1])]; + tensor query_61_pad_0 = const()[name = string("query_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_61_dilations_0 = const()[name = string("query_61_dilations_0"), val = tensor([1, 1])]; + int32 query_61_groups_0 = const()[name = string("query_61_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157455488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159552704))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_61_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_61_dilations_0, groups = query_61_groups_0, pad = query_61_pad_0, pad_type = query_61_pad_type_0, strides = query_61_strides_0, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("query_61_cast_fp16")]; + string current_key_41_pad_type_0 = const()[name = string("current_key_41_pad_type_0"), val = string("valid")]; + tensor current_key_41_strides_0 = const()[name = string("current_key_41_strides_0"), val = tensor([1, 1])]; + tensor current_key_41_pad_0 = const()[name = string("current_key_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_41_dilations_0 = const()[name = string("current_key_41_dilations_0"), val = tensor([1, 1])]; + int32 current_key_41_groups_0 = const()[name = string("current_key_41_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159553280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160601920))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_41_cast_fp16 = conv(dilations = current_key_41_dilations_0, groups = current_key_41_groups_0, pad = current_key_41_pad_0, pad_type = current_key_41_pad_type_0, strides = current_key_41_strides_0, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("current_key_41_cast_fp16")]; + string current_value_21_pad_type_0 = const()[name = string("current_value_21_pad_type_0"), val = string("valid")]; + tensor current_value_21_strides_0 = const()[name = string("current_value_21_strides_0"), val = tensor([1, 1])]; + tensor current_value_21_pad_0 = const()[name = string("current_value_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_21_dilations_0 = const()[name = string("current_value_21_dilations_0"), val = tensor([1, 1])]; + int32 current_value_21_groups_0 = const()[name = string("current_value_21_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160602496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161651136))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_21_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_21_dilations_0, groups = current_value_21_groups_0, pad = current_value_21_pad_0, pad_type = current_value_21_pad_type_0, strides = current_value_21_strides_0, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("current_value_21_cast_fp16")]; + tensor var_4101 = const()[name = string("op_4101"), val = tensor([16, 128, 1, 1])]; + tensor inputs_81_cast_fp16 = reshape(shape = var_4101, x = query_61_cast_fp16)[name = string("inputs_81_cast_fp16")]; + tensor inputs_sq_83_cast_fp16 = mul(x = inputs_81_cast_fp16, y = inputs_81_cast_fp16)[name = string("inputs_sq_83_cast_fp16")]; + tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([1])]; + bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; + tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = inputs_sq_83_cast_fp16)[name = string("variance_83_cast_fp16")]; + fp16 var_4107_to_fp16 = const()[name = string("op_4107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4108_cast_fp16 = add(x = variance_83_cast_fp16, y = var_4107_to_fp16)[name = string("op_4108_cast_fp16")]; + fp32 var_4109_epsilon_0 = const()[name = string("op_4109_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4109_cast_fp16 = rsqrt(epsilon = var_4109_epsilon_0, x = var_4108_cast_fp16)[name = string("op_4109_cast_fp16")]; + tensor hidden_states_103_cast_fp16 = mul(x = inputs_81_cast_fp16, y = var_4109_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor w_83_to_fp16 = const()[name = string("w_83_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161651712)))]; + tensor query_normed_21_cast_fp16 = mul(x = w_83_to_fp16, y = hidden_states_103_cast_fp16)[name = string("query_normed_21_cast_fp16")]; + tensor var_4117 = const()[name = string("op_4117"), val = tensor([8, 128, 1, 1])]; + tensor inputs_83_cast_fp16 = reshape(shape = var_4117, x = current_key_41_cast_fp16)[name = string("inputs_83_cast_fp16")]; + tensor inputs_sq_85_cast_fp16 = mul(x = inputs_83_cast_fp16, y = inputs_83_cast_fp16)[name = string("inputs_sq_85_cast_fp16")]; + tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([1])]; + bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; + tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = inputs_sq_85_cast_fp16)[name = string("variance_85_cast_fp16")]; + fp16 var_4123_to_fp16 = const()[name = string("op_4123_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4124_cast_fp16 = add(x = variance_85_cast_fp16, y = var_4123_to_fp16)[name = string("op_4124_cast_fp16")]; + fp32 var_4125_epsilon_0 = const()[name = string("op_4125_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4125_cast_fp16 = rsqrt(epsilon = var_4125_epsilon_0, x = var_4124_cast_fp16)[name = string("op_4125_cast_fp16")]; + tensor hidden_states_105_cast_fp16 = mul(x = inputs_83_cast_fp16, y = var_4125_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; + tensor w_85_to_fp16 = const()[name = string("w_85_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161652032)))]; + tensor current_key_normed_21_cast_fp16 = mul(x = w_85_to_fp16, y = hidden_states_105_cast_fp16)[name = string("current_key_normed_21_cast_fp16")]; + tensor var_4143 = const()[name = string("op_4143"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_61_cast_fp16 = reshape(shape = var_4143, x = query_normed_21_cast_fp16)[name = string("mh_q_61_cast_fp16")]; + tensor var_4145 = const()[name = string("op_4145"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_41_cast_fp16 = reshape(shape = var_4145, x = current_key_normed_21_cast_fp16)[name = string("mh_k_41_cast_fp16")]; + tensor var_4149_cast_fp16 = mul(x = mh_q_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4149_cast_fp16")]; + tensor var_4154_begin_0 = const()[name = string("op_4154_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4154_end_0 = const()[name = string("op_4154_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_4154_end_mask_0 = const()[name = string("op_4154_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4154_cast_fp16 = slice_by_index(begin = var_4154_begin_0, end = var_4154_end_0, end_mask = var_4154_end_mask_0, x = mh_q_61_cast_fp16)[name = string("op_4154_cast_fp16")]; + tensor var_4160_begin_0 = const()[name = string("op_4160_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4160_end_0 = const()[name = string("op_4160_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_4160_end_mask_0 = const()[name = string("op_4160_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4160_cast_fp16 = slice_by_index(begin = var_4160_begin_0, end = var_4160_end_0, end_mask = var_4160_end_mask_0, x = mh_q_61_cast_fp16)[name = string("op_4160_cast_fp16")]; + fp16 const_247_promoted_to_fp16 = const()[name = string("const_247_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4162_cast_fp16 = mul(x = var_4160_cast_fp16, y = const_247_promoted_to_fp16)[name = string("op_4162_cast_fp16")]; + bool var_4164_interleave_0 = const()[name = string("op_4164_interleave_0"), val = bool(false)]; + tensor var_4164_cast_fp16 = concat(axis = var_4042, interleave = var_4164_interleave_0, values = (var_4162_cast_fp16, var_4154_cast_fp16))[name = string("op_4164_cast_fp16")]; + tensor var_4165_cast_fp16 = mul(x = var_4164_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4165_cast_fp16")]; + tensor mh_q_63_cast_fp16 = add(x = var_4149_cast_fp16, y = var_4165_cast_fp16)[name = string("mh_q_63_cast_fp16")]; + tensor var_4167_cast_fp16 = mul(x = mh_k_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4167_cast_fp16")]; + tensor var_4172_begin_0 = const()[name = string("op_4172_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4172_end_0 = const()[name = string("op_4172_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_4172_end_mask_0 = const()[name = string("op_4172_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4172_cast_fp16 = slice_by_index(begin = var_4172_begin_0, end = var_4172_end_0, end_mask = var_4172_end_mask_0, x = mh_k_41_cast_fp16)[name = string("op_4172_cast_fp16")]; + tensor var_4178_begin_0 = const()[name = string("op_4178_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4178_end_0 = const()[name = string("op_4178_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_4178_end_mask_0 = const()[name = string("op_4178_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4178_cast_fp16 = slice_by_index(begin = var_4178_begin_0, end = var_4178_end_0, end_mask = var_4178_end_mask_0, x = mh_k_41_cast_fp16)[name = string("op_4178_cast_fp16")]; + fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4180_cast_fp16 = mul(x = var_4178_cast_fp16, y = const_250_promoted_to_fp16)[name = string("op_4180_cast_fp16")]; + bool var_4182_interleave_0 = const()[name = string("op_4182_interleave_0"), val = bool(false)]; + tensor var_4182_cast_fp16 = concat(axis = var_4042, interleave = var_4182_interleave_0, values = (var_4180_cast_fp16, var_4172_cast_fp16))[name = string("op_4182_cast_fp16")]; + tensor var_4183_cast_fp16 = mul(x = var_4182_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4183_cast_fp16")]; + tensor mh_k_43_cast_fp16 = add(x = var_4167_cast_fp16, y = var_4183_cast_fp16)[name = string("mh_k_43_cast_fp16")]; + tensor var_4187 = const()[name = string("op_4187"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_43_cast_fp16 = reshape(shape = var_4187, x = mh_k_43_cast_fp16)[name = string("current_key_43_cast_fp16")]; + tensor var_4194_cast_fp16 = mul(x = var_101_cast_fp16_10, y = var_323_cast_fp16)[name = string("op_4194_cast_fp16")]; + tensor var_4195_cast_fp16 = mul(x = current_key_43_cast_fp16, y = var_321_cast_fp16)[name = string("op_4195_cast_fp16")]; + tensor key_63_cast_fp16 = add(x = var_4194_cast_fp16, y = var_4195_cast_fp16)[name = string("key_63_cast_fp16")]; + tensor var_4198_cast_fp16 = mul(x = var_132_cast_fp16_10, y = var_323_cast_fp16)[name = string("op_4198_cast_fp16")]; + tensor var_4199_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_321_cast_fp16)[name = string("op_4199_cast_fp16")]; + tensor value_41_cast_fp16 = add(x = var_4198_cast_fp16, y = var_4199_cast_fp16)[name = string("value_41_cast_fp16")]; + tensor var_4203 = const()[name = string("op_4203"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_41_cast_fp16 = reshape(shape = var_4203, x = key_63_cast_fp16)[name = string("key_heads_41_cast_fp16")]; + tensor var_4205 = const()[name = string("op_4205"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_41_cast_fp16 = reshape(shape = var_4205, x = value_41_cast_fp16)[name = string("value_heads_41_cast_fp16")]; + tensor var_4208_begin_0 = const()[name = string("op_4208_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4208_end_0 = const()[name = string("op_4208_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4208_end_mask_0 = const()[name = string("op_4208_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4208_cast_fp16 = slice_by_index(begin = var_4208_begin_0, end = var_4208_end_0, end_mask = var_4208_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4208_cast_fp16")]; + tensor var_4212_begin_0 = const()[name = string("op_4212_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4212_end_0 = const()[name = string("op_4212_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4212_end_mask_0 = const()[name = string("op_4212_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4212_cast_fp16 = slice_by_index(begin = var_4212_begin_0, end = var_4212_end_0, end_mask = var_4212_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4212_cast_fp16")]; + tensor var_4224_begin_0 = const()[name = string("op_4224_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4224_end_0 = const()[name = string("op_4224_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_4224_end_mask_0 = const()[name = string("op_4224_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4224_cast_fp16 = slice_by_index(begin = var_4224_begin_0, end = var_4224_end_0, end_mask = var_4224_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4224_cast_fp16")]; + tensor var_4228_begin_0 = const()[name = string("op_4228_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4228_end_0 = const()[name = string("op_4228_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_4228_end_mask_0 = const()[name = string("op_4228_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4228_cast_fp16 = slice_by_index(begin = var_4228_begin_0, end = var_4228_end_0, end_mask = var_4228_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4228_cast_fp16")]; + tensor var_4240_begin_0 = const()[name = string("op_4240_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4240_end_0 = const()[name = string("op_4240_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_4240_end_mask_0 = const()[name = string("op_4240_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4240_cast_fp16 = slice_by_index(begin = var_4240_begin_0, end = var_4240_end_0, end_mask = var_4240_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4240_cast_fp16")]; + tensor var_4244_begin_0 = const()[name = string("op_4244_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4244_end_0 = const()[name = string("op_4244_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_4244_end_mask_0 = const()[name = string("op_4244_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4244_cast_fp16 = slice_by_index(begin = var_4244_begin_0, end = var_4244_end_0, end_mask = var_4244_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4244_cast_fp16")]; + tensor var_4256_begin_0 = const()[name = string("op_4256_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_4256_end_0 = const()[name = string("op_4256_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_4256_end_mask_0 = const()[name = string("op_4256_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4256_cast_fp16 = slice_by_index(begin = var_4256_begin_0, end = var_4256_end_0, end_mask = var_4256_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4256_cast_fp16")]; + tensor var_4260_begin_0 = const()[name = string("op_4260_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_4260_end_0 = const()[name = string("op_4260_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_4260_end_mask_0 = const()[name = string("op_4260_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4260_cast_fp16 = slice_by_index(begin = var_4260_begin_0, end = var_4260_end_0, end_mask = var_4260_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4260_cast_fp16")]; + tensor var_4272_begin_0 = const()[name = string("op_4272_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4272_end_0 = const()[name = string("op_4272_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_4272_end_mask_0 = const()[name = string("op_4272_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4272_cast_fp16 = slice_by_index(begin = var_4272_begin_0, end = var_4272_end_0, end_mask = var_4272_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4272_cast_fp16")]; + tensor var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4276_end_0 = const()[name = string("op_4276_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = var_4276_end_0, end_mask = var_4276_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4276_cast_fp16")]; + tensor var_4288_begin_0 = const()[name = string("op_4288_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_4288_end_0 = const()[name = string("op_4288_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_4288_end_mask_0 = const()[name = string("op_4288_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4288_cast_fp16 = slice_by_index(begin = var_4288_begin_0, end = var_4288_end_0, end_mask = var_4288_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4288_cast_fp16")]; + tensor var_4292_begin_0 = const()[name = string("op_4292_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_4292_end_0 = const()[name = string("op_4292_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_4292_end_mask_0 = const()[name = string("op_4292_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4292_cast_fp16 = slice_by_index(begin = var_4292_begin_0, end = var_4292_end_0, end_mask = var_4292_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4292_cast_fp16")]; + tensor var_4304_begin_0 = const()[name = string("op_4304_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_4304_end_0 = const()[name = string("op_4304_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_4304_end_mask_0 = const()[name = string("op_4304_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4304_cast_fp16 = slice_by_index(begin = var_4304_begin_0, end = var_4304_end_0, end_mask = var_4304_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4304_cast_fp16")]; + tensor var_4308_begin_0 = const()[name = string("op_4308_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_4308_end_0 = const()[name = string("op_4308_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_4308_end_mask_0 = const()[name = string("op_4308_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4308_cast_fp16 = slice_by_index(begin = var_4308_begin_0, end = var_4308_end_0, end_mask = var_4308_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4308_cast_fp16")]; + tensor var_4320_begin_0 = const()[name = string("op_4320_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_4320_end_0 = const()[name = string("op_4320_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4320_end_mask_0 = const()[name = string("op_4320_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4320_cast_fp16 = slice_by_index(begin = var_4320_begin_0, end = var_4320_end_0, end_mask = var_4320_end_mask_0, x = key_heads_41_cast_fp16)[name = string("op_4320_cast_fp16")]; + tensor var_4324_begin_0 = const()[name = string("op_4324_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_4324_end_0 = const()[name = string("op_4324_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4324_end_mask_0 = const()[name = string("op_4324_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4324_cast_fp16 = slice_by_index(begin = var_4324_begin_0, end = var_4324_end_0, end_mask = var_4324_end_mask_0, x = value_heads_41_cast_fp16)[name = string("op_4324_cast_fp16")]; + bool key_heads_43_interleave_0 = const()[name = string("key_heads_43_interleave_0"), val = bool(false)]; + tensor key_heads_43_cast_fp16 = concat(axis = var_4050, interleave = key_heads_43_interleave_0, values = (var_4208_cast_fp16, var_4208_cast_fp16, var_4224_cast_fp16, var_4224_cast_fp16, var_4240_cast_fp16, var_4240_cast_fp16, var_4256_cast_fp16, var_4256_cast_fp16, var_4272_cast_fp16, var_4272_cast_fp16, var_4288_cast_fp16, var_4288_cast_fp16, var_4304_cast_fp16, var_4304_cast_fp16, var_4320_cast_fp16, var_4320_cast_fp16))[name = string("key_heads_43_cast_fp16")]; + bool value_heads_43_interleave_0 = const()[name = string("value_heads_43_interleave_0"), val = bool(false)]; + tensor value_heads_43_cast_fp16 = concat(axis = var_4050, interleave = value_heads_43_interleave_0, values = (var_4212_cast_fp16, var_4212_cast_fp16, var_4228_cast_fp16, var_4228_cast_fp16, var_4244_cast_fp16, var_4244_cast_fp16, var_4260_cast_fp16, var_4260_cast_fp16, var_4276_cast_fp16, var_4276_cast_fp16, var_4292_cast_fp16, var_4292_cast_fp16, var_4308_cast_fp16, var_4308_cast_fp16, var_4324_cast_fp16, var_4324_cast_fp16))[name = string("value_heads_43_cast_fp16")]; + fp16 var_4347_to_fp16 = const()[name = string("op_4347_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_4348_cast_fp16 = mul(x = mh_q_63_cast_fp16, y = var_4347_to_fp16)[name = string("op_4348_cast_fp16")]; + bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)]; + bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)]; + tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_4348_cast_fp16, y = key_heads_43_cast_fp16)[name = string("mh_w_41_cast_fp16")]; + tensor mh_w_43_cast_fp16 = add(x = mh_w_41_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_43_cast_fp16")]; + tensor var_4360_cast_fp16 = softmax(axis = var_4032, x = mh_w_43_cast_fp16)[name = string("op_4360_cast_fp16")]; + bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)]; + bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = value_heads_43_cast_fp16, y = var_4360_cast_fp16)[name = string("attn_21_cast_fp16")]; + tensor var_4365 = const()[name = string("op_4365"), val = tensor([1, -1, 1, 1])]; + tensor input_81_cast_fp16 = reshape(shape = var_4365, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")]; + string obj_91_pad_type_0 = const()[name = string("obj_91_pad_type_0"), val = string("valid")]; + tensor obj_91_strides_0 = const()[name = string("obj_91_strides_0"), val = tensor([1, 1])]; + tensor obj_91_pad_0 = const()[name = string("obj_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_91_dilations_0 = const()[name = string("obj_91_dilations_0"), val = tensor([1, 1])]; + int32 obj_91_groups_0 = const()[name = string("obj_91_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161652352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163749568))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_91_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_91_dilations_0, groups = obj_91_groups_0, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = obj_91_strides_0, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("obj_91_cast_fp16")]; + tensor inputs_85_cast_fp16 = add(x = inputs_79_cast_fp16, y = obj_91_cast_fp16)[name = string("inputs_85_cast_fp16")]; + tensor inputs_sq_87_cast_fp16 = mul(x = inputs_85_cast_fp16, y = inputs_85_cast_fp16)[name = string("inputs_sq_87_cast_fp16")]; + tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([1])]; + bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; + tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = inputs_sq_87_cast_fp16)[name = string("variance_87_cast_fp16")]; + fp16 var_4383_to_fp16 = const()[name = string("op_4383_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4384_cast_fp16 = add(x = variance_87_cast_fp16, y = var_4383_to_fp16)[name = string("op_4384_cast_fp16")]; + fp32 var_4385_epsilon_0 = const()[name = string("op_4385_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4385_cast_fp16 = rsqrt(epsilon = var_4385_epsilon_0, x = var_4384_cast_fp16)[name = string("op_4385_cast_fp16")]; + tensor hidden_states_107_cast_fp16 = mul(x = inputs_85_cast_fp16, y = var_4385_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; + tensor w_87_to_fp16 = const()[name = string("w_87_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163750144)))]; + tensor input_83_cast_fp16 = mul(x = w_87_to_fp16, y = hidden_states_107_cast_fp16)[name = string("input_83_cast_fp16")]; + string input_85_pad_type_0 = const()[name = string("input_85_pad_type_0"), val = string("valid")]; + tensor input_85_strides_0 = const()[name = string("input_85_strides_0"), val = tensor([1, 1])]; + tensor input_85_pad_0 = const()[name = string("input_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_85_dilations_0 = const()[name = string("input_85_dilations_0"), val = tensor([1, 1])]; + int32 input_85_groups_0 = const()[name = string("input_85_groups_0"), val = int32(1)]; + tensor layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163752256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166898048))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_85_cast_fp16 = conv(dilations = input_85_dilations_0, groups = input_85_groups_0, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = input_85_strides_0, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_4399_cast_fp16 = silu(x = input_85_cast_fp16)[name = string("op_4399_cast_fp16")]; + string var_4405_pad_type_0 = const()[name = string("op_4405_pad_type_0"), val = string("valid")]; + tensor var_4405_strides_0 = const()[name = string("op_4405_strides_0"), val = tensor([1, 1])]; + tensor var_4405_pad_0 = const()[name = string("op_4405_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4405_dilations_0 = const()[name = string("op_4405_dilations_0"), val = tensor([1, 1])]; + int32 var_4405_groups_0 = const()[name = string("op_4405_groups_0"), val = int32(1)]; + tensor layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166898624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170044416))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_4405_cast_fp16 = conv(dilations = var_4405_dilations_0, groups = var_4405_groups_0, pad = var_4405_pad_0, pad_type = var_4405_pad_type_0, strides = var_4405_strides_0, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_4405_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = var_4399_cast_fp16, y = var_4405_cast_fp16)[name = string("input_87_cast_fp16")]; + string hidden_states_109_pad_type_0 = const()[name = string("hidden_states_109_pad_type_0"), val = string("valid")]; + tensor hidden_states_109_strides_0 = const()[name = string("hidden_states_109_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_109_pad_0 = const()[name = string("hidden_states_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_109_dilations_0 = const()[name = string("hidden_states_109_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_109_groups_0 = const()[name = string("hidden_states_109_groups_0"), val = int32(1)]; + tensor layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170044992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173190784))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_109_cast_fp16 = conv(dilations = hidden_states_109_dilations_0, groups = hidden_states_109_groups_0, pad = hidden_states_109_pad_0, pad_type = hidden_states_109_pad_type_0, strides = hidden_states_109_strides_0, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("inputs_87_cast_fp16")]; + int32 var_4419 = const()[name = string("op_4419"), val = int32(3)]; + int32 var_4429 = const()[name = string("op_4429"), val = int32(-2)]; + int32 var_4437 = const()[name = string("op_4437"), val = int32(1)]; + tensor inputs_sq_89_cast_fp16 = mul(x = inputs_87_cast_fp16, y = inputs_87_cast_fp16)[name = string("inputs_sq_89_cast_fp16")]; + tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([1])]; + bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; + tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = inputs_sq_89_cast_fp16)[name = string("variance_89_cast_fp16")]; + fp16 var_4449_to_fp16 = const()[name = string("op_4449_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4450_cast_fp16 = add(x = variance_89_cast_fp16, y = var_4449_to_fp16)[name = string("op_4450_cast_fp16")]; + fp32 var_4451_epsilon_0 = const()[name = string("op_4451_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4451_cast_fp16 = rsqrt(epsilon = var_4451_epsilon_0, x = var_4450_cast_fp16)[name = string("op_4451_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = mul(x = inputs_87_cast_fp16, y = var_4451_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor w_89_to_fp16 = const()[name = string("w_89_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173191360)))]; + tensor obj_93_cast_fp16 = mul(x = w_89_to_fp16, y = hidden_states_111_cast_fp16)[name = string("obj_93_cast_fp16")]; + string query_67_pad_type_0 = const()[name = string("query_67_pad_type_0"), val = string("valid")]; + tensor query_67_strides_0 = const()[name = string("query_67_strides_0"), val = tensor([1, 1])]; + tensor query_67_pad_0 = const()[name = string("query_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_67_dilations_0 = const()[name = string("query_67_dilations_0"), val = tensor([1, 1])]; + int32 query_67_groups_0 = const()[name = string("query_67_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173193472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175290688))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_67_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_67_dilations_0, groups = query_67_groups_0, pad = query_67_pad_0, pad_type = query_67_pad_type_0, strides = query_67_strides_0, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("query_67_cast_fp16")]; + string current_key_45_pad_type_0 = const()[name = string("current_key_45_pad_type_0"), val = string("valid")]; + tensor current_key_45_strides_0 = const()[name = string("current_key_45_strides_0"), val = tensor([1, 1])]; + tensor current_key_45_pad_0 = const()[name = string("current_key_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_45_dilations_0 = const()[name = string("current_key_45_dilations_0"), val = tensor([1, 1])]; + int32 current_key_45_groups_0 = const()[name = string("current_key_45_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175291264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176339904))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_45_cast_fp16 = conv(dilations = current_key_45_dilations_0, groups = current_key_45_groups_0, pad = current_key_45_pad_0, pad_type = current_key_45_pad_type_0, strides = current_key_45_strides_0, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("current_key_45_cast_fp16")]; + string current_value_23_pad_type_0 = const()[name = string("current_value_23_pad_type_0"), val = string("valid")]; + tensor current_value_23_strides_0 = const()[name = string("current_value_23_strides_0"), val = tensor([1, 1])]; + tensor current_value_23_pad_0 = const()[name = string("current_value_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_23_dilations_0 = const()[name = string("current_value_23_dilations_0"), val = tensor([1, 1])]; + int32 current_value_23_groups_0 = const()[name = string("current_value_23_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176340480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177389120))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_23_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_23_dilations_0, groups = current_value_23_groups_0, pad = current_value_23_pad_0, pad_type = current_value_23_pad_type_0, strides = current_value_23_strides_0, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("current_value_23_cast_fp16")]; + tensor var_4488 = const()[name = string("op_4488"), val = tensor([16, 128, 1, 1])]; + tensor inputs_89_cast_fp16 = reshape(shape = var_4488, x = query_67_cast_fp16)[name = string("inputs_89_cast_fp16")]; + tensor inputs_sq_91_cast_fp16 = mul(x = inputs_89_cast_fp16, y = inputs_89_cast_fp16)[name = string("inputs_sq_91_cast_fp16")]; + tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([1])]; + bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; + tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = inputs_sq_91_cast_fp16)[name = string("variance_91_cast_fp16")]; + fp16 var_4494_to_fp16 = const()[name = string("op_4494_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4495_cast_fp16 = add(x = variance_91_cast_fp16, y = var_4494_to_fp16)[name = string("op_4495_cast_fp16")]; + fp32 var_4496_epsilon_0 = const()[name = string("op_4496_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4496_cast_fp16 = rsqrt(epsilon = var_4496_epsilon_0, x = var_4495_cast_fp16)[name = string("op_4496_cast_fp16")]; + tensor hidden_states_113_cast_fp16 = mul(x = inputs_89_cast_fp16, y = var_4496_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor w_91_to_fp16 = const()[name = string("w_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177389696)))]; + tensor query_normed_23_cast_fp16 = mul(x = w_91_to_fp16, y = hidden_states_113_cast_fp16)[name = string("query_normed_23_cast_fp16")]; + tensor var_4504 = const()[name = string("op_4504"), val = tensor([8, 128, 1, 1])]; + tensor inputs_91_cast_fp16 = reshape(shape = var_4504, x = current_key_45_cast_fp16)[name = string("inputs_91_cast_fp16")]; + tensor inputs_sq_93_cast_fp16 = mul(x = inputs_91_cast_fp16, y = inputs_91_cast_fp16)[name = string("inputs_sq_93_cast_fp16")]; + tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([1])]; + bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; + tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = inputs_sq_93_cast_fp16)[name = string("variance_93_cast_fp16")]; + fp16 var_4510_to_fp16 = const()[name = string("op_4510_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4511_cast_fp16 = add(x = variance_93_cast_fp16, y = var_4510_to_fp16)[name = string("op_4511_cast_fp16")]; + fp32 var_4512_epsilon_0 = const()[name = string("op_4512_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4512_cast_fp16 = rsqrt(epsilon = var_4512_epsilon_0, x = var_4511_cast_fp16)[name = string("op_4512_cast_fp16")]; + tensor hidden_states_115_cast_fp16 = mul(x = inputs_91_cast_fp16, y = var_4512_cast_fp16)[name = string("hidden_states_115_cast_fp16")]; + tensor w_93_to_fp16 = const()[name = string("w_93_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177390016)))]; + tensor current_key_normed_23_cast_fp16 = mul(x = w_93_to_fp16, y = hidden_states_115_cast_fp16)[name = string("current_key_normed_23_cast_fp16")]; + tensor var_4530 = const()[name = string("op_4530"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_67_cast_fp16 = reshape(shape = var_4530, x = query_normed_23_cast_fp16)[name = string("mh_q_67_cast_fp16")]; + tensor var_4532 = const()[name = string("op_4532"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_45_cast_fp16 = reshape(shape = var_4532, x = current_key_normed_23_cast_fp16)[name = string("mh_k_45_cast_fp16")]; + tensor var_4536_cast_fp16 = mul(x = mh_q_67_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4536_cast_fp16")]; + tensor var_4541_begin_0 = const()[name = string("op_4541_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4541_end_0 = const()[name = string("op_4541_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_4541_end_mask_0 = const()[name = string("op_4541_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4541_cast_fp16 = slice_by_index(begin = var_4541_begin_0, end = var_4541_end_0, end_mask = var_4541_end_mask_0, x = mh_q_67_cast_fp16)[name = string("op_4541_cast_fp16")]; + tensor var_4547_begin_0 = const()[name = string("op_4547_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4547_end_0 = const()[name = string("op_4547_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_4547_end_mask_0 = const()[name = string("op_4547_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4547_cast_fp16 = slice_by_index(begin = var_4547_begin_0, end = var_4547_end_0, end_mask = var_4547_end_mask_0, x = mh_q_67_cast_fp16)[name = string("op_4547_cast_fp16")]; + fp16 const_270_promoted_to_fp16 = const()[name = string("const_270_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4549_cast_fp16 = mul(x = var_4547_cast_fp16, y = const_270_promoted_to_fp16)[name = string("op_4549_cast_fp16")]; + bool var_4551_interleave_0 = const()[name = string("op_4551_interleave_0"), val = bool(false)]; + tensor var_4551_cast_fp16 = concat(axis = var_4429, interleave = var_4551_interleave_0, values = (var_4549_cast_fp16, var_4541_cast_fp16))[name = string("op_4551_cast_fp16")]; + tensor var_4552_cast_fp16 = mul(x = var_4551_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4552_cast_fp16")]; + tensor mh_q_69_cast_fp16 = add(x = var_4536_cast_fp16, y = var_4552_cast_fp16)[name = string("mh_q_69_cast_fp16")]; + tensor var_4554_cast_fp16 = mul(x = mh_k_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4554_cast_fp16")]; + tensor var_4559_begin_0 = const()[name = string("op_4559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4559_end_0 = const()[name = string("op_4559_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_4559_end_mask_0 = const()[name = string("op_4559_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4559_cast_fp16 = slice_by_index(begin = var_4559_begin_0, end = var_4559_end_0, end_mask = var_4559_end_mask_0, x = mh_k_45_cast_fp16)[name = string("op_4559_cast_fp16")]; + tensor var_4565_begin_0 = const()[name = string("op_4565_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4565_end_0 = const()[name = string("op_4565_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_4565_end_mask_0 = const()[name = string("op_4565_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4565_cast_fp16 = slice_by_index(begin = var_4565_begin_0, end = var_4565_end_0, end_mask = var_4565_end_mask_0, x = mh_k_45_cast_fp16)[name = string("op_4565_cast_fp16")]; + fp16 const_273_promoted_to_fp16 = const()[name = string("const_273_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4567_cast_fp16 = mul(x = var_4565_cast_fp16, y = const_273_promoted_to_fp16)[name = string("op_4567_cast_fp16")]; + bool var_4569_interleave_0 = const()[name = string("op_4569_interleave_0"), val = bool(false)]; + tensor var_4569_cast_fp16 = concat(axis = var_4429, interleave = var_4569_interleave_0, values = (var_4567_cast_fp16, var_4559_cast_fp16))[name = string("op_4569_cast_fp16")]; + tensor var_4570_cast_fp16 = mul(x = var_4569_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4570_cast_fp16")]; + tensor mh_k_47_cast_fp16 = add(x = var_4554_cast_fp16, y = var_4570_cast_fp16)[name = string("mh_k_47_cast_fp16")]; + tensor var_4574 = const()[name = string("op_4574"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_47_cast_fp16 = reshape(shape = var_4574, x = mh_k_47_cast_fp16)[name = string("current_key_47_cast_fp16")]; + tensor var_4581_cast_fp16 = mul(x = var_101_cast_fp16_11, y = var_323_cast_fp16)[name = string("op_4581_cast_fp16")]; + tensor var_4582_cast_fp16 = mul(x = current_key_47_cast_fp16, y = var_321_cast_fp16)[name = string("op_4582_cast_fp16")]; + tensor key_69_cast_fp16 = add(x = var_4581_cast_fp16, y = var_4582_cast_fp16)[name = string("key_69_cast_fp16")]; + tensor var_4585_cast_fp16 = mul(x = var_132_cast_fp16_11, y = var_323_cast_fp16)[name = string("op_4585_cast_fp16")]; + tensor var_4586_cast_fp16 = mul(x = current_value_23_cast_fp16, y = var_321_cast_fp16)[name = string("op_4586_cast_fp16")]; + tensor value_45_cast_fp16 = add(x = var_4585_cast_fp16, y = var_4586_cast_fp16)[name = string("value_45_cast_fp16")]; + tensor var_4590 = const()[name = string("op_4590"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_45_cast_fp16 = reshape(shape = var_4590, x = key_69_cast_fp16)[name = string("key_heads_45_cast_fp16")]; + tensor var_4592 = const()[name = string("op_4592"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_45_cast_fp16 = reshape(shape = var_4592, x = value_45_cast_fp16)[name = string("value_heads_45_cast_fp16")]; + tensor var_4595_begin_0 = const()[name = string("op_4595_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4595_end_0 = const()[name = string("op_4595_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4595_end_mask_0 = const()[name = string("op_4595_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4595_cast_fp16 = slice_by_index(begin = var_4595_begin_0, end = var_4595_end_0, end_mask = var_4595_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4595_cast_fp16")]; + tensor var_4599_begin_0 = const()[name = string("op_4599_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4599_end_0 = const()[name = string("op_4599_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4599_end_mask_0 = const()[name = string("op_4599_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4599_cast_fp16 = slice_by_index(begin = var_4599_begin_0, end = var_4599_end_0, end_mask = var_4599_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4599_cast_fp16")]; + tensor var_4611_begin_0 = const()[name = string("op_4611_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4611_end_0 = const()[name = string("op_4611_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_4611_end_mask_0 = const()[name = string("op_4611_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4611_cast_fp16 = slice_by_index(begin = var_4611_begin_0, end = var_4611_end_0, end_mask = var_4611_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4611_cast_fp16")]; + tensor var_4615_begin_0 = const()[name = string("op_4615_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4615_end_0 = const()[name = string("op_4615_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_4615_end_mask_0 = const()[name = string("op_4615_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4615_cast_fp16 = slice_by_index(begin = var_4615_begin_0, end = var_4615_end_0, end_mask = var_4615_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4615_cast_fp16")]; + tensor var_4627_begin_0 = const()[name = string("op_4627_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4627_end_0 = const()[name = string("op_4627_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_4627_end_mask_0 = const()[name = string("op_4627_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4627_cast_fp16 = slice_by_index(begin = var_4627_begin_0, end = var_4627_end_0, end_mask = var_4627_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4627_cast_fp16")]; + tensor var_4631_begin_0 = const()[name = string("op_4631_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4631_end_0 = const()[name = string("op_4631_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_4631_end_mask_0 = const()[name = string("op_4631_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4631_cast_fp16 = slice_by_index(begin = var_4631_begin_0, end = var_4631_end_0, end_mask = var_4631_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4631_cast_fp16")]; + tensor var_4643_begin_0 = const()[name = string("op_4643_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_4643_end_0 = const()[name = string("op_4643_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_4643_end_mask_0 = const()[name = string("op_4643_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4643_cast_fp16 = slice_by_index(begin = var_4643_begin_0, end = var_4643_end_0, end_mask = var_4643_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4643_cast_fp16")]; + tensor var_4647_begin_0 = const()[name = string("op_4647_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_4647_end_0 = const()[name = string("op_4647_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_4647_end_mask_0 = const()[name = string("op_4647_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4647_cast_fp16 = slice_by_index(begin = var_4647_begin_0, end = var_4647_end_0, end_mask = var_4647_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4647_cast_fp16")]; + tensor var_4659_begin_0 = const()[name = string("op_4659_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4659_end_0 = const()[name = string("op_4659_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_4659_end_mask_0 = const()[name = string("op_4659_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4659_cast_fp16 = slice_by_index(begin = var_4659_begin_0, end = var_4659_end_0, end_mask = var_4659_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4659_cast_fp16")]; + tensor var_4663_begin_0 = const()[name = string("op_4663_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4663_end_0 = const()[name = string("op_4663_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_4663_end_mask_0 = const()[name = string("op_4663_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4663_cast_fp16 = slice_by_index(begin = var_4663_begin_0, end = var_4663_end_0, end_mask = var_4663_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4663_cast_fp16")]; + tensor var_4675_begin_0 = const()[name = string("op_4675_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_4675_end_0 = const()[name = string("op_4675_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_4675_end_mask_0 = const()[name = string("op_4675_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4675_cast_fp16 = slice_by_index(begin = var_4675_begin_0, end = var_4675_end_0, end_mask = var_4675_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4675_cast_fp16")]; + tensor var_4679_begin_0 = const()[name = string("op_4679_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_4679_end_0 = const()[name = string("op_4679_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_4679_end_mask_0 = const()[name = string("op_4679_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4679_cast_fp16 = slice_by_index(begin = var_4679_begin_0, end = var_4679_end_0, end_mask = var_4679_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4679_cast_fp16")]; + tensor var_4691_begin_0 = const()[name = string("op_4691_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_4691_end_0 = const()[name = string("op_4691_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_4691_end_mask_0 = const()[name = string("op_4691_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4691_cast_fp16 = slice_by_index(begin = var_4691_begin_0, end = var_4691_end_0, end_mask = var_4691_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4691_cast_fp16")]; + tensor var_4695_begin_0 = const()[name = string("op_4695_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_4695_end_0 = const()[name = string("op_4695_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_4695_end_mask_0 = const()[name = string("op_4695_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4695_cast_fp16 = slice_by_index(begin = var_4695_begin_0, end = var_4695_end_0, end_mask = var_4695_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4695_cast_fp16")]; + tensor var_4707_begin_0 = const()[name = string("op_4707_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_4707_end_0 = const()[name = string("op_4707_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4707_end_mask_0 = const()[name = string("op_4707_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4707_cast_fp16 = slice_by_index(begin = var_4707_begin_0, end = var_4707_end_0, end_mask = var_4707_end_mask_0, x = key_heads_45_cast_fp16)[name = string("op_4707_cast_fp16")]; + tensor var_4711_begin_0 = const()[name = string("op_4711_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_4711_end_0 = const()[name = string("op_4711_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4711_end_mask_0 = const()[name = string("op_4711_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4711_cast_fp16 = slice_by_index(begin = var_4711_begin_0, end = var_4711_end_0, end_mask = var_4711_end_mask_0, x = value_heads_45_cast_fp16)[name = string("op_4711_cast_fp16")]; + bool key_heads_47_interleave_0 = const()[name = string("key_heads_47_interleave_0"), val = bool(false)]; + tensor key_heads_47_cast_fp16 = concat(axis = var_4437, interleave = key_heads_47_interleave_0, values = (var_4595_cast_fp16, var_4595_cast_fp16, var_4611_cast_fp16, var_4611_cast_fp16, var_4627_cast_fp16, var_4627_cast_fp16, var_4643_cast_fp16, var_4643_cast_fp16, var_4659_cast_fp16, var_4659_cast_fp16, var_4675_cast_fp16, var_4675_cast_fp16, var_4691_cast_fp16, var_4691_cast_fp16, var_4707_cast_fp16, var_4707_cast_fp16))[name = string("key_heads_47_cast_fp16")]; + bool value_heads_47_interleave_0 = const()[name = string("value_heads_47_interleave_0"), val = bool(false)]; + tensor value_heads_47_cast_fp16 = concat(axis = var_4437, interleave = value_heads_47_interleave_0, values = (var_4599_cast_fp16, var_4599_cast_fp16, var_4615_cast_fp16, var_4615_cast_fp16, var_4631_cast_fp16, var_4631_cast_fp16, var_4647_cast_fp16, var_4647_cast_fp16, var_4663_cast_fp16, var_4663_cast_fp16, var_4679_cast_fp16, var_4679_cast_fp16, var_4695_cast_fp16, var_4695_cast_fp16, var_4711_cast_fp16, var_4711_cast_fp16))[name = string("value_heads_47_cast_fp16")]; + fp16 var_4734_to_fp16 = const()[name = string("op_4734_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_4735_cast_fp16 = mul(x = mh_q_69_cast_fp16, y = var_4734_to_fp16)[name = string("op_4735_cast_fp16")]; + bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)]; + bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)]; + tensor mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_4735_cast_fp16, y = key_heads_47_cast_fp16)[name = string("mh_w_45_cast_fp16")]; + tensor mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_47_cast_fp16")]; + tensor var_4747_cast_fp16 = softmax(axis = var_4419, x = mh_w_47_cast_fp16)[name = string("op_4747_cast_fp16")]; + bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)]; + bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = value_heads_47_cast_fp16, y = var_4747_cast_fp16)[name = string("attn_23_cast_fp16")]; + tensor var_4752 = const()[name = string("op_4752"), val = tensor([1, -1, 1, 1])]; + tensor input_89_cast_fp16 = reshape(shape = var_4752, x = attn_23_cast_fp16)[name = string("input_89_cast_fp16")]; + string obj_99_pad_type_0 = const()[name = string("obj_99_pad_type_0"), val = string("valid")]; + tensor obj_99_strides_0 = const()[name = string("obj_99_strides_0"), val = tensor([1, 1])]; + tensor obj_99_pad_0 = const()[name = string("obj_99_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_99_dilations_0 = const()[name = string("obj_99_dilations_0"), val = tensor([1, 1])]; + int32 obj_99_groups_0 = const()[name = string("obj_99_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177390336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179487552))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_99_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_99_dilations_0, groups = obj_99_groups_0, pad = obj_99_pad_0, pad_type = obj_99_pad_type_0, strides = obj_99_strides_0, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("obj_99_cast_fp16")]; + tensor inputs_93_cast_fp16 = add(x = inputs_87_cast_fp16, y = obj_99_cast_fp16)[name = string("inputs_93_cast_fp16")]; + tensor inputs_sq_95_cast_fp16 = mul(x = inputs_93_cast_fp16, y = inputs_93_cast_fp16)[name = string("inputs_sq_95_cast_fp16")]; + tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([1])]; + bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; + tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = inputs_sq_95_cast_fp16)[name = string("variance_95_cast_fp16")]; + fp16 var_4770_to_fp16 = const()[name = string("op_4770_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4771_cast_fp16 = add(x = variance_95_cast_fp16, y = var_4770_to_fp16)[name = string("op_4771_cast_fp16")]; + fp32 var_4772_epsilon_0 = const()[name = string("op_4772_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4772_cast_fp16 = rsqrt(epsilon = var_4772_epsilon_0, x = var_4771_cast_fp16)[name = string("op_4772_cast_fp16")]; + tensor hidden_states_117_cast_fp16 = mul(x = inputs_93_cast_fp16, y = var_4772_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; + tensor w_95_to_fp16 = const()[name = string("w_95_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179488128)))]; + tensor input_91_cast_fp16 = mul(x = w_95_to_fp16, y = hidden_states_117_cast_fp16)[name = string("input_91_cast_fp16")]; + string input_93_pad_type_0 = const()[name = string("input_93_pad_type_0"), val = string("valid")]; + tensor input_93_strides_0 = const()[name = string("input_93_strides_0"), val = tensor([1, 1])]; + tensor input_93_pad_0 = const()[name = string("input_93_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_93_dilations_0 = const()[name = string("input_93_dilations_0"), val = tensor([1, 1])]; + int32 input_93_groups_0 = const()[name = string("input_93_groups_0"), val = int32(1)]; + tensor layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179490240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182636032))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_93_cast_fp16 = conv(dilations = input_93_dilations_0, groups = input_93_groups_0, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = input_93_strides_0, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("input_93_cast_fp16")]; + tensor var_4786_cast_fp16 = silu(x = input_93_cast_fp16)[name = string("op_4786_cast_fp16")]; + string var_4792_pad_type_0 = const()[name = string("op_4792_pad_type_0"), val = string("valid")]; + tensor var_4792_strides_0 = const()[name = string("op_4792_strides_0"), val = tensor([1, 1])]; + tensor var_4792_pad_0 = const()[name = string("op_4792_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4792_dilations_0 = const()[name = string("op_4792_dilations_0"), val = tensor([1, 1])]; + int32 var_4792_groups_0 = const()[name = string("op_4792_groups_0"), val = int32(1)]; + tensor layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182636608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185782400))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_4792_cast_fp16 = conv(dilations = var_4792_dilations_0, groups = var_4792_groups_0, pad = var_4792_pad_0, pad_type = var_4792_pad_type_0, strides = var_4792_strides_0, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_4792_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_4786_cast_fp16, y = var_4792_cast_fp16)[name = string("input_95_cast_fp16")]; + string hidden_states_119_pad_type_0 = const()[name = string("hidden_states_119_pad_type_0"), val = string("valid")]; + tensor hidden_states_119_strides_0 = const()[name = string("hidden_states_119_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_119_pad_0 = const()[name = string("hidden_states_119_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_119_dilations_0 = const()[name = string("hidden_states_119_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_119_groups_0 = const()[name = string("hidden_states_119_groups_0"), val = int32(1)]; + tensor layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185782976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188928768))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_119_cast_fp16 = conv(dilations = hidden_states_119_dilations_0, groups = hidden_states_119_groups_0, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = hidden_states_119_strides_0, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("inputs_95_cast_fp16")]; + int32 var_4806 = const()[name = string("op_4806"), val = int32(3)]; + int32 var_4816 = const()[name = string("op_4816"), val = int32(-2)]; + int32 var_4824 = const()[name = string("op_4824"), val = int32(1)]; + tensor inputs_sq_97_cast_fp16 = mul(x = inputs_95_cast_fp16, y = inputs_95_cast_fp16)[name = string("inputs_sq_97_cast_fp16")]; + tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([1])]; + bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; + tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = inputs_sq_97_cast_fp16)[name = string("variance_97_cast_fp16")]; + fp16 var_4836_to_fp16 = const()[name = string("op_4836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4837_cast_fp16 = add(x = variance_97_cast_fp16, y = var_4836_to_fp16)[name = string("op_4837_cast_fp16")]; + fp32 var_4838_epsilon_0 = const()[name = string("op_4838_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4838_cast_fp16 = rsqrt(epsilon = var_4838_epsilon_0, x = var_4837_cast_fp16)[name = string("op_4838_cast_fp16")]; + tensor hidden_states_121_cast_fp16 = mul(x = inputs_95_cast_fp16, y = var_4838_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; + tensor w_97_to_fp16 = const()[name = string("w_97_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188929344)))]; + tensor obj_101_cast_fp16 = mul(x = w_97_to_fp16, y = hidden_states_121_cast_fp16)[name = string("obj_101_cast_fp16")]; + string query_73_pad_type_0 = const()[name = string("query_73_pad_type_0"), val = string("valid")]; + tensor query_73_strides_0 = const()[name = string("query_73_strides_0"), val = tensor([1, 1])]; + tensor query_73_pad_0 = const()[name = string("query_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_73_dilations_0 = const()[name = string("query_73_dilations_0"), val = tensor([1, 1])]; + int32 query_73_groups_0 = const()[name = string("query_73_groups_0"), val = int32(1)]; + tensor layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188931456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191028672))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_73_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_73_dilations_0, groups = query_73_groups_0, pad = query_73_pad_0, pad_type = query_73_pad_type_0, strides = query_73_strides_0, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("query_73_cast_fp16")]; + string current_key_49_pad_type_0 = const()[name = string("current_key_49_pad_type_0"), val = string("valid")]; + tensor current_key_49_strides_0 = const()[name = string("current_key_49_strides_0"), val = tensor([1, 1])]; + tensor current_key_49_pad_0 = const()[name = string("current_key_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_49_dilations_0 = const()[name = string("current_key_49_dilations_0"), val = tensor([1, 1])]; + int32 current_key_49_groups_0 = const()[name = string("current_key_49_groups_0"), val = int32(1)]; + tensor layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191029248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192077888))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_49_cast_fp16 = conv(dilations = current_key_49_dilations_0, groups = current_key_49_groups_0, pad = current_key_49_pad_0, pad_type = current_key_49_pad_type_0, strides = current_key_49_strides_0, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("current_key_49_cast_fp16")]; + string current_value_25_pad_type_0 = const()[name = string("current_value_25_pad_type_0"), val = string("valid")]; + tensor current_value_25_strides_0 = const()[name = string("current_value_25_strides_0"), val = tensor([1, 1])]; + tensor current_value_25_pad_0 = const()[name = string("current_value_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_25_dilations_0 = const()[name = string("current_value_25_dilations_0"), val = tensor([1, 1])]; + int32 current_value_25_groups_0 = const()[name = string("current_value_25_groups_0"), val = int32(1)]; + tensor layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192078464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193127104))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_25_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_25_dilations_0, groups = current_value_25_groups_0, pad = current_value_25_pad_0, pad_type = current_value_25_pad_type_0, strides = current_value_25_strides_0, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("current_value_25_cast_fp16")]; + tensor var_4875 = const()[name = string("op_4875"), val = tensor([16, 128, 1, 1])]; + tensor inputs_97_cast_fp16 = reshape(shape = var_4875, x = query_73_cast_fp16)[name = string("inputs_97_cast_fp16")]; + tensor inputs_sq_99_cast_fp16 = mul(x = inputs_97_cast_fp16, y = inputs_97_cast_fp16)[name = string("inputs_sq_99_cast_fp16")]; + tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([1])]; + bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; + tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = inputs_sq_99_cast_fp16)[name = string("variance_99_cast_fp16")]; + fp16 var_4881_to_fp16 = const()[name = string("op_4881_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4882_cast_fp16 = add(x = variance_99_cast_fp16, y = var_4881_to_fp16)[name = string("op_4882_cast_fp16")]; + fp32 var_4883_epsilon_0 = const()[name = string("op_4883_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4883_cast_fp16 = rsqrt(epsilon = var_4883_epsilon_0, x = var_4882_cast_fp16)[name = string("op_4883_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = mul(x = inputs_97_cast_fp16, y = var_4883_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor w_99_to_fp16 = const()[name = string("w_99_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193127680)))]; + tensor query_normed_25_cast_fp16 = mul(x = w_99_to_fp16, y = hidden_states_123_cast_fp16)[name = string("query_normed_25_cast_fp16")]; + tensor var_4891 = const()[name = string("op_4891"), val = tensor([8, 128, 1, 1])]; + tensor inputs_99_cast_fp16 = reshape(shape = var_4891, x = current_key_49_cast_fp16)[name = string("inputs_99_cast_fp16")]; + tensor inputs_sq_101_cast_fp16 = mul(x = inputs_99_cast_fp16, y = inputs_99_cast_fp16)[name = string("inputs_sq_101_cast_fp16")]; + tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([1])]; + bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; + tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = inputs_sq_101_cast_fp16)[name = string("variance_101_cast_fp16")]; + fp16 var_4897_to_fp16 = const()[name = string("op_4897_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4898_cast_fp16 = add(x = variance_101_cast_fp16, y = var_4897_to_fp16)[name = string("op_4898_cast_fp16")]; + fp32 var_4899_epsilon_0 = const()[name = string("op_4899_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4899_cast_fp16 = rsqrt(epsilon = var_4899_epsilon_0, x = var_4898_cast_fp16)[name = string("op_4899_cast_fp16")]; + tensor hidden_states_125_cast_fp16 = mul(x = inputs_99_cast_fp16, y = var_4899_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; + tensor w_101_to_fp16 = const()[name = string("w_101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193128000)))]; + tensor current_key_normed_25_cast_fp16 = mul(x = w_101_to_fp16, y = hidden_states_125_cast_fp16)[name = string("current_key_normed_25_cast_fp16")]; + tensor var_4917 = const()[name = string("op_4917"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_73_cast_fp16 = reshape(shape = var_4917, x = query_normed_25_cast_fp16)[name = string("mh_q_73_cast_fp16")]; + tensor var_4919 = const()[name = string("op_4919"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_49_cast_fp16 = reshape(shape = var_4919, x = current_key_normed_25_cast_fp16)[name = string("mh_k_49_cast_fp16")]; + tensor var_4923_cast_fp16 = mul(x = mh_q_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4923_cast_fp16")]; + tensor var_4928_begin_0 = const()[name = string("op_4928_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4928_end_0 = const()[name = string("op_4928_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_4928_end_mask_0 = const()[name = string("op_4928_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4928_cast_fp16 = slice_by_index(begin = var_4928_begin_0, end = var_4928_end_0, end_mask = var_4928_end_mask_0, x = mh_q_73_cast_fp16)[name = string("op_4928_cast_fp16")]; + tensor var_4934_begin_0 = const()[name = string("op_4934_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4934_end_0 = const()[name = string("op_4934_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_4934_end_mask_0 = const()[name = string("op_4934_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4934_cast_fp16 = slice_by_index(begin = var_4934_begin_0, end = var_4934_end_0, end_mask = var_4934_end_mask_0, x = mh_q_73_cast_fp16)[name = string("op_4934_cast_fp16")]; + fp16 const_293_promoted_to_fp16 = const()[name = string("const_293_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4936_cast_fp16 = mul(x = var_4934_cast_fp16, y = const_293_promoted_to_fp16)[name = string("op_4936_cast_fp16")]; + bool var_4938_interleave_0 = const()[name = string("op_4938_interleave_0"), val = bool(false)]; + tensor var_4938_cast_fp16 = concat(axis = var_4816, interleave = var_4938_interleave_0, values = (var_4936_cast_fp16, var_4928_cast_fp16))[name = string("op_4938_cast_fp16")]; + tensor var_4939_cast_fp16 = mul(x = var_4938_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4939_cast_fp16")]; + tensor mh_q_75_cast_fp16 = add(x = var_4923_cast_fp16, y = var_4939_cast_fp16)[name = string("mh_q_75_cast_fp16")]; + tensor var_4941_cast_fp16 = mul(x = mh_k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4941_cast_fp16")]; + tensor var_4946_begin_0 = const()[name = string("op_4946_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4946_end_0 = const()[name = string("op_4946_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_4946_end_mask_0 = const()[name = string("op_4946_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4946_cast_fp16 = slice_by_index(begin = var_4946_begin_0, end = var_4946_end_0, end_mask = var_4946_end_mask_0, x = mh_k_49_cast_fp16)[name = string("op_4946_cast_fp16")]; + tensor var_4952_begin_0 = const()[name = string("op_4952_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_4952_end_0 = const()[name = string("op_4952_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_4952_end_mask_0 = const()[name = string("op_4952_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4952_cast_fp16 = slice_by_index(begin = var_4952_begin_0, end = var_4952_end_0, end_mask = var_4952_end_mask_0, x = mh_k_49_cast_fp16)[name = string("op_4952_cast_fp16")]; + fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4954_cast_fp16 = mul(x = var_4952_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_4954_cast_fp16")]; + bool var_4956_interleave_0 = const()[name = string("op_4956_interleave_0"), val = bool(false)]; + tensor var_4956_cast_fp16 = concat(axis = var_4816, interleave = var_4956_interleave_0, values = (var_4954_cast_fp16, var_4946_cast_fp16))[name = string("op_4956_cast_fp16")]; + tensor var_4957_cast_fp16 = mul(x = var_4956_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4957_cast_fp16")]; + tensor mh_k_51_cast_fp16 = add(x = var_4941_cast_fp16, y = var_4957_cast_fp16)[name = string("mh_k_51_cast_fp16")]; + tensor var_4961 = const()[name = string("op_4961"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_51_cast_fp16 = reshape(shape = var_4961, x = mh_k_51_cast_fp16)[name = string("current_key_51_cast_fp16")]; + tensor var_4968_cast_fp16 = mul(x = var_101_cast_fp16_12, y = var_323_cast_fp16)[name = string("op_4968_cast_fp16")]; + tensor var_4969_cast_fp16 = mul(x = current_key_51_cast_fp16, y = var_321_cast_fp16)[name = string("op_4969_cast_fp16")]; + tensor key_75_cast_fp16 = add(x = var_4968_cast_fp16, y = var_4969_cast_fp16)[name = string("key_75_cast_fp16")]; + tensor var_4972_cast_fp16 = mul(x = var_132_cast_fp16_12, y = var_323_cast_fp16)[name = string("op_4972_cast_fp16")]; + tensor var_4973_cast_fp16 = mul(x = current_value_25_cast_fp16, y = var_321_cast_fp16)[name = string("op_4973_cast_fp16")]; + tensor value_49_cast_fp16 = add(x = var_4972_cast_fp16, y = var_4973_cast_fp16)[name = string("value_49_cast_fp16")]; + tensor var_4977 = const()[name = string("op_4977"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_49_cast_fp16 = reshape(shape = var_4977, x = key_75_cast_fp16)[name = string("key_heads_49_cast_fp16")]; + tensor var_4979 = const()[name = string("op_4979"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_49_cast_fp16 = reshape(shape = var_4979, x = value_49_cast_fp16)[name = string("value_heads_49_cast_fp16")]; + tensor var_4982_begin_0 = const()[name = string("op_4982_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4982_end_0 = const()[name = string("op_4982_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4982_end_mask_0 = const()[name = string("op_4982_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4982_cast_fp16 = slice_by_index(begin = var_4982_begin_0, end = var_4982_end_0, end_mask = var_4982_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_4982_cast_fp16")]; + tensor var_4986_begin_0 = const()[name = string("op_4986_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4986_end_0 = const()[name = string("op_4986_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_4986_end_mask_0 = const()[name = string("op_4986_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4986_cast_fp16 = slice_by_index(begin = var_4986_begin_0, end = var_4986_end_0, end_mask = var_4986_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_4986_cast_fp16")]; + tensor var_4998_begin_0 = const()[name = string("op_4998_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4998_end_0 = const()[name = string("op_4998_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_4998_end_mask_0 = const()[name = string("op_4998_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4998_cast_fp16 = slice_by_index(begin = var_4998_begin_0, end = var_4998_end_0, end_mask = var_4998_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_4998_cast_fp16")]; + tensor var_5002_begin_0 = const()[name = string("op_5002_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_5002_end_0 = const()[name = string("op_5002_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_5002_end_mask_0 = const()[name = string("op_5002_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5002_cast_fp16 = slice_by_index(begin = var_5002_begin_0, end = var_5002_end_0, end_mask = var_5002_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5002_cast_fp16")]; + tensor var_5014_begin_0 = const()[name = string("op_5014_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5014_end_0 = const()[name = string("op_5014_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5014_end_mask_0 = const()[name = string("op_5014_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5014_cast_fp16 = slice_by_index(begin = var_5014_begin_0, end = var_5014_end_0, end_mask = var_5014_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5014_cast_fp16")]; + tensor var_5018_begin_0 = const()[name = string("op_5018_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5018_end_0 = const()[name = string("op_5018_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5018_end_mask_0 = const()[name = string("op_5018_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5018_cast_fp16 = slice_by_index(begin = var_5018_begin_0, end = var_5018_end_0, end_mask = var_5018_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5018_cast_fp16")]; + tensor var_5030_begin_0 = const()[name = string("op_5030_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5030_end_0 = const()[name = string("op_5030_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5030_end_mask_0 = const()[name = string("op_5030_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5030_cast_fp16 = slice_by_index(begin = var_5030_begin_0, end = var_5030_end_0, end_mask = var_5030_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5030_cast_fp16")]; + tensor var_5034_begin_0 = const()[name = string("op_5034_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5034_end_0 = const()[name = string("op_5034_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5034_end_mask_0 = const()[name = string("op_5034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5034_cast_fp16 = slice_by_index(begin = var_5034_begin_0, end = var_5034_end_0, end_mask = var_5034_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5034_cast_fp16")]; + tensor var_5046_begin_0 = const()[name = string("op_5046_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5046_end_0 = const()[name = string("op_5046_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5046_end_mask_0 = const()[name = string("op_5046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5046_cast_fp16 = slice_by_index(begin = var_5046_begin_0, end = var_5046_end_0, end_mask = var_5046_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5046_cast_fp16")]; + tensor var_5050_begin_0 = const()[name = string("op_5050_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5050_end_0 = const()[name = string("op_5050_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5050_end_mask_0 = const()[name = string("op_5050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5050_cast_fp16 = slice_by_index(begin = var_5050_begin_0, end = var_5050_end_0, end_mask = var_5050_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5050_cast_fp16")]; + tensor var_5062_begin_0 = const()[name = string("op_5062_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5062_end_0 = const()[name = string("op_5062_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5062_end_mask_0 = const()[name = string("op_5062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5062_cast_fp16 = slice_by_index(begin = var_5062_begin_0, end = var_5062_end_0, end_mask = var_5062_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5062_cast_fp16")]; + tensor var_5066_begin_0 = const()[name = string("op_5066_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5066_end_0 = const()[name = string("op_5066_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5066_end_mask_0 = const()[name = string("op_5066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5066_cast_fp16 = slice_by_index(begin = var_5066_begin_0, end = var_5066_end_0, end_mask = var_5066_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5066_cast_fp16")]; + tensor var_5078_begin_0 = const()[name = string("op_5078_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5078_end_0 = const()[name = string("op_5078_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5078_end_mask_0 = const()[name = string("op_5078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5078_cast_fp16 = slice_by_index(begin = var_5078_begin_0, end = var_5078_end_0, end_mask = var_5078_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5078_cast_fp16")]; + tensor var_5082_begin_0 = const()[name = string("op_5082_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5082_end_0 = const()[name = string("op_5082_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5082_end_mask_0 = const()[name = string("op_5082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5082_cast_fp16")]; + tensor var_5094_begin_0 = const()[name = string("op_5094_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5094_end_0 = const()[name = string("op_5094_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5094_end_mask_0 = const()[name = string("op_5094_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5094_cast_fp16 = slice_by_index(begin = var_5094_begin_0, end = var_5094_end_0, end_mask = var_5094_end_mask_0, x = key_heads_49_cast_fp16)[name = string("op_5094_cast_fp16")]; + tensor var_5098_begin_0 = const()[name = string("op_5098_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5098_end_0 = const()[name = string("op_5098_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5098_end_mask_0 = const()[name = string("op_5098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5098_cast_fp16 = slice_by_index(begin = var_5098_begin_0, end = var_5098_end_0, end_mask = var_5098_end_mask_0, x = value_heads_49_cast_fp16)[name = string("op_5098_cast_fp16")]; + bool key_heads_51_interleave_0 = const()[name = string("key_heads_51_interleave_0"), val = bool(false)]; + tensor key_heads_51_cast_fp16 = concat(axis = var_4824, interleave = key_heads_51_interleave_0, values = (var_4982_cast_fp16, var_4982_cast_fp16, var_4998_cast_fp16, var_4998_cast_fp16, var_5014_cast_fp16, var_5014_cast_fp16, var_5030_cast_fp16, var_5030_cast_fp16, var_5046_cast_fp16, var_5046_cast_fp16, var_5062_cast_fp16, var_5062_cast_fp16, var_5078_cast_fp16, var_5078_cast_fp16, var_5094_cast_fp16, var_5094_cast_fp16))[name = string("key_heads_51_cast_fp16")]; + bool value_heads_51_interleave_0 = const()[name = string("value_heads_51_interleave_0"), val = bool(false)]; + tensor value_heads_51_cast_fp16 = concat(axis = var_4824, interleave = value_heads_51_interleave_0, values = (var_4986_cast_fp16, var_4986_cast_fp16, var_5002_cast_fp16, var_5002_cast_fp16, var_5018_cast_fp16, var_5018_cast_fp16, var_5034_cast_fp16, var_5034_cast_fp16, var_5050_cast_fp16, var_5050_cast_fp16, var_5066_cast_fp16, var_5066_cast_fp16, var_5082_cast_fp16, var_5082_cast_fp16, var_5098_cast_fp16, var_5098_cast_fp16))[name = string("value_heads_51_cast_fp16")]; + fp16 var_5121_to_fp16 = const()[name = string("op_5121_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_5122_cast_fp16 = mul(x = mh_q_75_cast_fp16, y = var_5121_to_fp16)[name = string("op_5122_cast_fp16")]; + bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)]; + bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_5122_cast_fp16, y = key_heads_51_cast_fp16)[name = string("mh_w_49_cast_fp16")]; + tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_51_cast_fp16")]; + tensor var_5134_cast_fp16 = softmax(axis = var_4806, x = mh_w_51_cast_fp16)[name = string("op_5134_cast_fp16")]; + bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)]; + bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = value_heads_51_cast_fp16, y = var_5134_cast_fp16)[name = string("attn_25_cast_fp16")]; + tensor var_5139 = const()[name = string("op_5139"), val = tensor([1, -1, 1, 1])]; + tensor input_97_cast_fp16 = reshape(shape = var_5139, x = attn_25_cast_fp16)[name = string("input_97_cast_fp16")]; + string obj_107_pad_type_0 = const()[name = string("obj_107_pad_type_0"), val = string("valid")]; + tensor obj_107_strides_0 = const()[name = string("obj_107_strides_0"), val = tensor([1, 1])]; + tensor obj_107_pad_0 = const()[name = string("obj_107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_107_dilations_0 = const()[name = string("obj_107_dilations_0"), val = tensor([1, 1])]; + int32 obj_107_groups_0 = const()[name = string("obj_107_groups_0"), val = int32(1)]; + tensor layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193128320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195225536))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_107_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_107_dilations_0, groups = obj_107_groups_0, pad = obj_107_pad_0, pad_type = obj_107_pad_type_0, strides = obj_107_strides_0, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_97_cast_fp16)[name = string("obj_107_cast_fp16")]; + tensor inputs_101_cast_fp16 = add(x = inputs_95_cast_fp16, y = obj_107_cast_fp16)[name = string("inputs_101_cast_fp16")]; + tensor inputs_sq_103_cast_fp16 = mul(x = inputs_101_cast_fp16, y = inputs_101_cast_fp16)[name = string("inputs_sq_103_cast_fp16")]; + tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([1])]; + bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; + tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = inputs_sq_103_cast_fp16)[name = string("variance_103_cast_fp16")]; + fp16 var_5157_to_fp16 = const()[name = string("op_5157_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5158_cast_fp16 = add(x = variance_103_cast_fp16, y = var_5157_to_fp16)[name = string("op_5158_cast_fp16")]; + fp32 var_5159_epsilon_0 = const()[name = string("op_5159_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5159_cast_fp16 = rsqrt(epsilon = var_5159_epsilon_0, x = var_5158_cast_fp16)[name = string("op_5159_cast_fp16")]; + tensor hidden_states_127_cast_fp16 = mul(x = inputs_101_cast_fp16, y = var_5159_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + tensor w_103_to_fp16 = const()[name = string("w_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195226112)))]; + tensor input_99_cast_fp16 = mul(x = w_103_to_fp16, y = hidden_states_127_cast_fp16)[name = string("input_99_cast_fp16")]; + string input_101_pad_type_0 = const()[name = string("input_101_pad_type_0"), val = string("valid")]; + tensor input_101_strides_0 = const()[name = string("input_101_strides_0"), val = tensor([1, 1])]; + tensor input_101_pad_0 = const()[name = string("input_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_101_dilations_0 = const()[name = string("input_101_dilations_0"), val = tensor([1, 1])]; + int32 input_101_groups_0 = const()[name = string("input_101_groups_0"), val = int32(1)]; + tensor layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195228224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198374016))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_101_cast_fp16 = conv(dilations = input_101_dilations_0, groups = input_101_groups_0, pad = input_101_pad_0, pad_type = input_101_pad_type_0, strides = input_101_strides_0, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("input_101_cast_fp16")]; + tensor var_5173_cast_fp16 = silu(x = input_101_cast_fp16)[name = string("op_5173_cast_fp16")]; + string var_5179_pad_type_0 = const()[name = string("op_5179_pad_type_0"), val = string("valid")]; + tensor var_5179_strides_0 = const()[name = string("op_5179_strides_0"), val = tensor([1, 1])]; + tensor var_5179_pad_0 = const()[name = string("op_5179_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5179_dilations_0 = const()[name = string("op_5179_dilations_0"), val = tensor([1, 1])]; + int32 var_5179_groups_0 = const()[name = string("op_5179_groups_0"), val = int32(1)]; + tensor layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198374592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201520384))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_5179_cast_fp16 = conv(dilations = var_5179_dilations_0, groups = var_5179_groups_0, pad = var_5179_pad_0, pad_type = var_5179_pad_type_0, strides = var_5179_strides_0, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_5179_cast_fp16")]; + tensor input_103_cast_fp16 = mul(x = var_5173_cast_fp16, y = var_5179_cast_fp16)[name = string("input_103_cast_fp16")]; + string hidden_states_129_pad_type_0 = const()[name = string("hidden_states_129_pad_type_0"), val = string("valid")]; + tensor hidden_states_129_strides_0 = const()[name = string("hidden_states_129_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_129_pad_0 = const()[name = string("hidden_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_129_dilations_0 = const()[name = string("hidden_states_129_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_129_groups_0 = const()[name = string("hidden_states_129_groups_0"), val = int32(1)]; + tensor layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201520960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204666752))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_129_cast_fp16 = conv(dilations = hidden_states_129_dilations_0, groups = hidden_states_129_groups_0, pad = hidden_states_129_pad_0, pad_type = hidden_states_129_pad_type_0, strides = hidden_states_129_strides_0, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; + tensor inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("inputs_103_cast_fp16")]; + int32 var_5193 = const()[name = string("op_5193"), val = int32(3)]; + int32 var_5203 = const()[name = string("op_5203"), val = int32(-2)]; + int32 var_5211 = const()[name = string("op_5211"), val = int32(1)]; + tensor inputs_sq_105_cast_fp16 = mul(x = inputs_103_cast_fp16, y = inputs_103_cast_fp16)[name = string("inputs_sq_105_cast_fp16")]; + tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([1])]; + bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; + tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = inputs_sq_105_cast_fp16)[name = string("variance_105_cast_fp16")]; + fp16 var_5223_to_fp16 = const()[name = string("op_5223_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5224_cast_fp16 = add(x = variance_105_cast_fp16, y = var_5223_to_fp16)[name = string("op_5224_cast_fp16")]; + fp32 var_5225_epsilon_0 = const()[name = string("op_5225_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5225_cast_fp16 = rsqrt(epsilon = var_5225_epsilon_0, x = var_5224_cast_fp16)[name = string("op_5225_cast_fp16")]; + tensor hidden_states_131_cast_fp16 = mul(x = inputs_103_cast_fp16, y = var_5225_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor w_105_to_fp16 = const()[name = string("w_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204667328)))]; + tensor obj_109_cast_fp16 = mul(x = w_105_to_fp16, y = hidden_states_131_cast_fp16)[name = string("obj_109_cast_fp16")]; + string query_79_pad_type_0 = const()[name = string("query_79_pad_type_0"), val = string("valid")]; + tensor query_79_strides_0 = const()[name = string("query_79_strides_0"), val = tensor([1, 1])]; + tensor query_79_pad_0 = const()[name = string("query_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_79_dilations_0 = const()[name = string("query_79_dilations_0"), val = tensor([1, 1])]; + int32 query_79_groups_0 = const()[name = string("query_79_groups_0"), val = int32(1)]; + tensor layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204669440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206766656))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_79_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_79_dilations_0, groups = query_79_groups_0, pad = query_79_pad_0, pad_type = query_79_pad_type_0, strides = query_79_strides_0, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("query_79_cast_fp16")]; + string current_key_53_pad_type_0 = const()[name = string("current_key_53_pad_type_0"), val = string("valid")]; + tensor current_key_53_strides_0 = const()[name = string("current_key_53_strides_0"), val = tensor([1, 1])]; + tensor current_key_53_pad_0 = const()[name = string("current_key_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_53_dilations_0 = const()[name = string("current_key_53_dilations_0"), val = tensor([1, 1])]; + int32 current_key_53_groups_0 = const()[name = string("current_key_53_groups_0"), val = int32(1)]; + tensor layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206767232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207815872))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_53_cast_fp16 = conv(dilations = current_key_53_dilations_0, groups = current_key_53_groups_0, pad = current_key_53_pad_0, pad_type = current_key_53_pad_type_0, strides = current_key_53_strides_0, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("current_key_53_cast_fp16")]; + string current_value_27_pad_type_0 = const()[name = string("current_value_27_pad_type_0"), val = string("valid")]; + tensor current_value_27_strides_0 = const()[name = string("current_value_27_strides_0"), val = tensor([1, 1])]; + tensor current_value_27_pad_0 = const()[name = string("current_value_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_27_dilations_0 = const()[name = string("current_value_27_dilations_0"), val = tensor([1, 1])]; + int32 current_value_27_groups_0 = const()[name = string("current_value_27_groups_0"), val = int32(1)]; + tensor layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207816448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208865088))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_27_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_27_dilations_0, groups = current_value_27_groups_0, pad = current_value_27_pad_0, pad_type = current_value_27_pad_type_0, strides = current_value_27_strides_0, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("current_value_27_cast_fp16")]; + tensor var_5262 = const()[name = string("op_5262"), val = tensor([16, 128, 1, 1])]; + tensor inputs_105_cast_fp16 = reshape(shape = var_5262, x = query_79_cast_fp16)[name = string("inputs_105_cast_fp16")]; + tensor inputs_sq_107_cast_fp16 = mul(x = inputs_105_cast_fp16, y = inputs_105_cast_fp16)[name = string("inputs_sq_107_cast_fp16")]; + tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([1])]; + bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; + tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = inputs_sq_107_cast_fp16)[name = string("variance_107_cast_fp16")]; + fp16 var_5268_to_fp16 = const()[name = string("op_5268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5269_cast_fp16 = add(x = variance_107_cast_fp16, y = var_5268_to_fp16)[name = string("op_5269_cast_fp16")]; + fp32 var_5270_epsilon_0 = const()[name = string("op_5270_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5270_cast_fp16 = rsqrt(epsilon = var_5270_epsilon_0, x = var_5269_cast_fp16)[name = string("op_5270_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = mul(x = inputs_105_cast_fp16, y = var_5270_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + tensor w_107_to_fp16 = const()[name = string("w_107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208865664)))]; + tensor query_normed_27_cast_fp16 = mul(x = w_107_to_fp16, y = hidden_states_133_cast_fp16)[name = string("query_normed_27_cast_fp16")]; + tensor var_5278 = const()[name = string("op_5278"), val = tensor([8, 128, 1, 1])]; + tensor inputs_107_cast_fp16 = reshape(shape = var_5278, x = current_key_53_cast_fp16)[name = string("inputs_107_cast_fp16")]; + tensor inputs_sq_109_cast_fp16 = mul(x = inputs_107_cast_fp16, y = inputs_107_cast_fp16)[name = string("inputs_sq_109_cast_fp16")]; + tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([1])]; + bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; + tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = inputs_sq_109_cast_fp16)[name = string("variance_109_cast_fp16")]; + fp16 var_5284_to_fp16 = const()[name = string("op_5284_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5285_cast_fp16 = add(x = variance_109_cast_fp16, y = var_5284_to_fp16)[name = string("op_5285_cast_fp16")]; + fp32 var_5286_epsilon_0 = const()[name = string("op_5286_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5286_cast_fp16 = rsqrt(epsilon = var_5286_epsilon_0, x = var_5285_cast_fp16)[name = string("op_5286_cast_fp16")]; + tensor hidden_states_135_cast_fp16 = mul(x = inputs_107_cast_fp16, y = var_5286_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor w_109_to_fp16 = const()[name = string("w_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208865984)))]; + tensor current_key_normed_27_cast_fp16 = mul(x = w_109_to_fp16, y = hidden_states_135_cast_fp16)[name = string("current_key_normed_27_cast_fp16")]; + tensor var_5304 = const()[name = string("op_5304"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_79_cast_fp16 = reshape(shape = var_5304, x = query_normed_27_cast_fp16)[name = string("mh_q_79_cast_fp16")]; + tensor var_5306 = const()[name = string("op_5306"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_53_cast_fp16 = reshape(shape = var_5306, x = current_key_normed_27_cast_fp16)[name = string("mh_k_53_cast_fp16")]; + tensor var_5310_cast_fp16 = mul(x = mh_q_79_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5310_cast_fp16")]; + tensor var_5315_begin_0 = const()[name = string("op_5315_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5315_end_0 = const()[name = string("op_5315_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_5315_end_mask_0 = const()[name = string("op_5315_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5315_cast_fp16 = slice_by_index(begin = var_5315_begin_0, end = var_5315_end_0, end_mask = var_5315_end_mask_0, x = mh_q_79_cast_fp16)[name = string("op_5315_cast_fp16")]; + tensor var_5321_begin_0 = const()[name = string("op_5321_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_5321_end_0 = const()[name = string("op_5321_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_5321_end_mask_0 = const()[name = string("op_5321_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5321_cast_fp16 = slice_by_index(begin = var_5321_begin_0, end = var_5321_end_0, end_mask = var_5321_end_mask_0, x = mh_q_79_cast_fp16)[name = string("op_5321_cast_fp16")]; + fp16 const_316_promoted_to_fp16 = const()[name = string("const_316_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5323_cast_fp16 = mul(x = var_5321_cast_fp16, y = const_316_promoted_to_fp16)[name = string("op_5323_cast_fp16")]; + bool var_5325_interleave_0 = const()[name = string("op_5325_interleave_0"), val = bool(false)]; + tensor var_5325_cast_fp16 = concat(axis = var_5203, interleave = var_5325_interleave_0, values = (var_5323_cast_fp16, var_5315_cast_fp16))[name = string("op_5325_cast_fp16")]; + tensor var_5326_cast_fp16 = mul(x = var_5325_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5326_cast_fp16")]; + tensor mh_q_81_cast_fp16 = add(x = var_5310_cast_fp16, y = var_5326_cast_fp16)[name = string("mh_q_81_cast_fp16")]; + tensor var_5328_cast_fp16 = mul(x = mh_k_53_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5328_cast_fp16")]; + tensor var_5333_begin_0 = const()[name = string("op_5333_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5333_end_0 = const()[name = string("op_5333_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_5333_end_mask_0 = const()[name = string("op_5333_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5333_cast_fp16 = slice_by_index(begin = var_5333_begin_0, end = var_5333_end_0, end_mask = var_5333_end_mask_0, x = mh_k_53_cast_fp16)[name = string("op_5333_cast_fp16")]; + tensor var_5339_begin_0 = const()[name = string("op_5339_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_5339_end_0 = const()[name = string("op_5339_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_5339_end_mask_0 = const()[name = string("op_5339_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5339_cast_fp16 = slice_by_index(begin = var_5339_begin_0, end = var_5339_end_0, end_mask = var_5339_end_mask_0, x = mh_k_53_cast_fp16)[name = string("op_5339_cast_fp16")]; + fp16 const_319_promoted_to_fp16 = const()[name = string("const_319_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5341_cast_fp16 = mul(x = var_5339_cast_fp16, y = const_319_promoted_to_fp16)[name = string("op_5341_cast_fp16")]; + bool var_5343_interleave_0 = const()[name = string("op_5343_interleave_0"), val = bool(false)]; + tensor var_5343_cast_fp16 = concat(axis = var_5203, interleave = var_5343_interleave_0, values = (var_5341_cast_fp16, var_5333_cast_fp16))[name = string("op_5343_cast_fp16")]; + tensor var_5344_cast_fp16 = mul(x = var_5343_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5344_cast_fp16")]; + tensor mh_k_55_cast_fp16 = add(x = var_5328_cast_fp16, y = var_5344_cast_fp16)[name = string("mh_k_55_cast_fp16")]; + tensor var_5348 = const()[name = string("op_5348"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_55_cast_fp16 = reshape(shape = var_5348, x = mh_k_55_cast_fp16)[name = string("current_key_55_cast_fp16")]; + tensor var_5355_cast_fp16 = mul(x = var_101_cast_fp16_13, y = var_323_cast_fp16)[name = string("op_5355_cast_fp16")]; + tensor var_5356_cast_fp16 = mul(x = current_key_55_cast_fp16, y = var_321_cast_fp16)[name = string("op_5356_cast_fp16")]; + tensor key_81_cast_fp16 = add(x = var_5355_cast_fp16, y = var_5356_cast_fp16)[name = string("key_81_cast_fp16")]; + tensor var_5359_cast_fp16 = mul(x = var_132_cast_fp16_13, y = var_323_cast_fp16)[name = string("op_5359_cast_fp16")]; + tensor var_5360_cast_fp16 = mul(x = current_value_27_cast_fp16, y = var_321_cast_fp16)[name = string("op_5360_cast_fp16")]; + tensor value_53_cast_fp16 = add(x = var_5359_cast_fp16, y = var_5360_cast_fp16)[name = string("value_53_cast_fp16")]; + tensor var_5364 = const()[name = string("op_5364"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_53_cast_fp16 = reshape(shape = var_5364, x = key_81_cast_fp16)[name = string("key_heads_53_cast_fp16")]; + tensor var_5366 = const()[name = string("op_5366"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_53_cast_fp16 = reshape(shape = var_5366, x = value_53_cast_fp16)[name = string("value_heads_53_cast_fp16")]; + tensor var_5369_begin_0 = const()[name = string("op_5369_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5369_end_0 = const()[name = string("op_5369_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5369_end_mask_0 = const()[name = string("op_5369_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5369_cast_fp16 = slice_by_index(begin = var_5369_begin_0, end = var_5369_end_0, end_mask = var_5369_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5369_cast_fp16")]; + tensor var_5373_begin_0 = const()[name = string("op_5373_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5373_end_0 = const()[name = string("op_5373_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5373_end_mask_0 = const()[name = string("op_5373_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5373_cast_fp16 = slice_by_index(begin = var_5373_begin_0, end = var_5373_end_0, end_mask = var_5373_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5373_cast_fp16")]; + tensor var_5385_begin_0 = const()[name = string("op_5385_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_5385_end_0 = const()[name = string("op_5385_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_5385_end_mask_0 = const()[name = string("op_5385_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5385_cast_fp16 = slice_by_index(begin = var_5385_begin_0, end = var_5385_end_0, end_mask = var_5385_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5385_cast_fp16")]; + tensor var_5389_begin_0 = const()[name = string("op_5389_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_5389_end_0 = const()[name = string("op_5389_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_5389_end_mask_0 = const()[name = string("op_5389_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5389_cast_fp16 = slice_by_index(begin = var_5389_begin_0, end = var_5389_end_0, end_mask = var_5389_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5389_cast_fp16")]; + tensor var_5401_begin_0 = const()[name = string("op_5401_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5401_end_0 = const()[name = string("op_5401_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5401_end_mask_0 = const()[name = string("op_5401_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5401_cast_fp16 = slice_by_index(begin = var_5401_begin_0, end = var_5401_end_0, end_mask = var_5401_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5401_cast_fp16")]; + tensor var_5405_begin_0 = const()[name = string("op_5405_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5405_end_0 = const()[name = string("op_5405_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5405_end_mask_0 = const()[name = string("op_5405_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5405_cast_fp16 = slice_by_index(begin = var_5405_begin_0, end = var_5405_end_0, end_mask = var_5405_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5405_cast_fp16")]; + tensor var_5417_begin_0 = const()[name = string("op_5417_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5417_end_0 = const()[name = string("op_5417_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5417_end_mask_0 = const()[name = string("op_5417_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5417_cast_fp16 = slice_by_index(begin = var_5417_begin_0, end = var_5417_end_0, end_mask = var_5417_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5417_cast_fp16")]; + tensor var_5421_begin_0 = const()[name = string("op_5421_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5421_end_0 = const()[name = string("op_5421_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5421_end_mask_0 = const()[name = string("op_5421_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5421_cast_fp16 = slice_by_index(begin = var_5421_begin_0, end = var_5421_end_0, end_mask = var_5421_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5421_cast_fp16")]; + tensor var_5433_begin_0 = const()[name = string("op_5433_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5433_end_0 = const()[name = string("op_5433_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5433_end_mask_0 = const()[name = string("op_5433_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5433_cast_fp16 = slice_by_index(begin = var_5433_begin_0, end = var_5433_end_0, end_mask = var_5433_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5433_cast_fp16")]; + tensor var_5437_begin_0 = const()[name = string("op_5437_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5437_end_0 = const()[name = string("op_5437_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5437_end_mask_0 = const()[name = string("op_5437_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5437_cast_fp16 = slice_by_index(begin = var_5437_begin_0, end = var_5437_end_0, end_mask = var_5437_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5437_cast_fp16")]; + tensor var_5449_begin_0 = const()[name = string("op_5449_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5449_end_0 = const()[name = string("op_5449_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5449_end_mask_0 = const()[name = string("op_5449_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5449_cast_fp16 = slice_by_index(begin = var_5449_begin_0, end = var_5449_end_0, end_mask = var_5449_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5449_cast_fp16")]; + tensor var_5453_begin_0 = const()[name = string("op_5453_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5453_end_0 = const()[name = string("op_5453_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5453_end_mask_0 = const()[name = string("op_5453_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5453_cast_fp16 = slice_by_index(begin = var_5453_begin_0, end = var_5453_end_0, end_mask = var_5453_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5453_cast_fp16")]; + tensor var_5465_begin_0 = const()[name = string("op_5465_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5465_end_0 = const()[name = string("op_5465_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5465_end_mask_0 = const()[name = string("op_5465_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5465_cast_fp16 = slice_by_index(begin = var_5465_begin_0, end = var_5465_end_0, end_mask = var_5465_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5465_cast_fp16")]; + tensor var_5469_begin_0 = const()[name = string("op_5469_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5469_end_0 = const()[name = string("op_5469_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5469_end_mask_0 = const()[name = string("op_5469_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5469_cast_fp16 = slice_by_index(begin = var_5469_begin_0, end = var_5469_end_0, end_mask = var_5469_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5469_cast_fp16")]; + tensor var_5481_begin_0 = const()[name = string("op_5481_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5481_end_0 = const()[name = string("op_5481_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5481_end_mask_0 = const()[name = string("op_5481_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5481_cast_fp16 = slice_by_index(begin = var_5481_begin_0, end = var_5481_end_0, end_mask = var_5481_end_mask_0, x = key_heads_53_cast_fp16)[name = string("op_5481_cast_fp16")]; + tensor var_5485_begin_0 = const()[name = string("op_5485_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5485_end_0 = const()[name = string("op_5485_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5485_end_mask_0 = const()[name = string("op_5485_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5485_cast_fp16 = slice_by_index(begin = var_5485_begin_0, end = var_5485_end_0, end_mask = var_5485_end_mask_0, x = value_heads_53_cast_fp16)[name = string("op_5485_cast_fp16")]; + bool key_heads_55_interleave_0 = const()[name = string("key_heads_55_interleave_0"), val = bool(false)]; + tensor key_heads_55_cast_fp16 = concat(axis = var_5211, interleave = key_heads_55_interleave_0, values = (var_5369_cast_fp16, var_5369_cast_fp16, var_5385_cast_fp16, var_5385_cast_fp16, var_5401_cast_fp16, var_5401_cast_fp16, var_5417_cast_fp16, var_5417_cast_fp16, var_5433_cast_fp16, var_5433_cast_fp16, var_5449_cast_fp16, var_5449_cast_fp16, var_5465_cast_fp16, var_5465_cast_fp16, var_5481_cast_fp16, var_5481_cast_fp16))[name = string("key_heads_55_cast_fp16")]; + bool value_heads_55_interleave_0 = const()[name = string("value_heads_55_interleave_0"), val = bool(false)]; + tensor value_heads_55_cast_fp16 = concat(axis = var_5211, interleave = value_heads_55_interleave_0, values = (var_5373_cast_fp16, var_5373_cast_fp16, var_5389_cast_fp16, var_5389_cast_fp16, var_5405_cast_fp16, var_5405_cast_fp16, var_5421_cast_fp16, var_5421_cast_fp16, var_5437_cast_fp16, var_5437_cast_fp16, var_5453_cast_fp16, var_5453_cast_fp16, var_5469_cast_fp16, var_5469_cast_fp16, var_5485_cast_fp16, var_5485_cast_fp16))[name = string("value_heads_55_cast_fp16")]; + fp16 var_5508_to_fp16 = const()[name = string("op_5508_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_5509_cast_fp16 = mul(x = mh_q_81_cast_fp16, y = var_5508_to_fp16)[name = string("op_5509_cast_fp16")]; + bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)]; + bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)]; + tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_5509_cast_fp16, y = key_heads_55_cast_fp16)[name = string("mh_w_53_cast_fp16")]; + tensor mh_w_55_cast_fp16 = add(x = mh_w_53_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_55_cast_fp16")]; + tensor var_5521_cast_fp16 = softmax(axis = var_5193, x = mh_w_55_cast_fp16)[name = string("op_5521_cast_fp16")]; + bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)]; + bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = value_heads_55_cast_fp16, y = var_5521_cast_fp16)[name = string("attn_27_cast_fp16")]; + tensor var_5526 = const()[name = string("op_5526"), val = tensor([1, -1, 1, 1])]; + tensor input_105_cast_fp16 = reshape(shape = var_5526, x = attn_27_cast_fp16)[name = string("input_105_cast_fp16")]; + string obj_115_pad_type_0 = const()[name = string("obj_115_pad_type_0"), val = string("valid")]; + tensor obj_115_strides_0 = const()[name = string("obj_115_strides_0"), val = tensor([1, 1])]; + tensor obj_115_pad_0 = const()[name = string("obj_115_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_115_dilations_0 = const()[name = string("obj_115_dilations_0"), val = tensor([1, 1])]; + int32 obj_115_groups_0 = const()[name = string("obj_115_groups_0"), val = int32(1)]; + tensor layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208866304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210963520))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_115_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_115_dilations_0, groups = obj_115_groups_0, pad = obj_115_pad_0, pad_type = obj_115_pad_type_0, strides = obj_115_strides_0, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("obj_115_cast_fp16")]; + tensor inputs_109_cast_fp16 = add(x = inputs_103_cast_fp16, y = obj_115_cast_fp16)[name = string("inputs_109_cast_fp16")]; + tensor inputs_sq_111_cast_fp16 = mul(x = inputs_109_cast_fp16, y = inputs_109_cast_fp16)[name = string("inputs_sq_111_cast_fp16")]; + tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([1])]; + bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; + tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = inputs_sq_111_cast_fp16)[name = string("variance_111_cast_fp16")]; + fp16 var_5544_to_fp16 = const()[name = string("op_5544_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5545_cast_fp16 = add(x = variance_111_cast_fp16, y = var_5544_to_fp16)[name = string("op_5545_cast_fp16")]; + fp32 var_5546_epsilon_0 = const()[name = string("op_5546_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5546_cast_fp16 = rsqrt(epsilon = var_5546_epsilon_0, x = var_5545_cast_fp16)[name = string("op_5546_cast_fp16")]; + tensor hidden_states_137_cast_fp16 = mul(x = inputs_109_cast_fp16, y = var_5546_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; + tensor w_111_to_fp16 = const()[name = string("w_111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210964096)))]; + tensor input_107_cast_fp16 = mul(x = w_111_to_fp16, y = hidden_states_137_cast_fp16)[name = string("input_107_cast_fp16")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210966208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214112000))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_109_cast_fp16 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")]; + tensor var_5560_cast_fp16 = silu(x = input_109_cast_fp16)[name = string("op_5560_cast_fp16")]; + string var_5566_pad_type_0 = const()[name = string("op_5566_pad_type_0"), val = string("valid")]; + tensor var_5566_strides_0 = const()[name = string("op_5566_strides_0"), val = tensor([1, 1])]; + tensor var_5566_pad_0 = const()[name = string("op_5566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5566_dilations_0 = const()[name = string("op_5566_dilations_0"), val = tensor([1, 1])]; + int32 var_5566_groups_0 = const()[name = string("op_5566_groups_0"), val = int32(1)]; + tensor layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214112576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217258368))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_5566_cast_fp16 = conv(dilations = var_5566_dilations_0, groups = var_5566_groups_0, pad = var_5566_pad_0, pad_type = var_5566_pad_type_0, strides = var_5566_strides_0, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = input_107_cast_fp16)[name = string("op_5566_cast_fp16")]; + tensor input_111_cast_fp16 = mul(x = var_5560_cast_fp16, y = var_5566_cast_fp16)[name = string("input_111_cast_fp16")]; + string hidden_states_139_pad_type_0 = const()[name = string("hidden_states_139_pad_type_0"), val = string("valid")]; + tensor hidden_states_139_strides_0 = const()[name = string("hidden_states_139_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_139_pad_0 = const()[name = string("hidden_states_139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_139_dilations_0 = const()[name = string("hidden_states_139_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_139_groups_0 = const()[name = string("hidden_states_139_groups_0"), val = int32(1)]; + tensor layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217258944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220404736))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_139_cast_fp16 = conv(dilations = hidden_states_139_dilations_0, groups = hidden_states_139_groups_0, pad = hidden_states_139_pad_0, pad_type = hidden_states_139_pad_type_0, strides = hidden_states_139_strides_0, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + tensor inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = hidden_states_139_cast_fp16)[name = string("inputs_111_cast_fp16")]; + int32 var_5580 = const()[name = string("op_5580"), val = int32(3)]; + int32 var_5590 = const()[name = string("op_5590"), val = int32(-2)]; + int32 var_5598 = const()[name = string("op_5598"), val = int32(1)]; + tensor inputs_sq_113_cast_fp16 = mul(x = inputs_111_cast_fp16, y = inputs_111_cast_fp16)[name = string("inputs_sq_113_cast_fp16")]; + tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([1])]; + bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; + tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = inputs_sq_113_cast_fp16)[name = string("variance_113_cast_fp16")]; + fp16 var_5610_to_fp16 = const()[name = string("op_5610_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5611_cast_fp16 = add(x = variance_113_cast_fp16, y = var_5610_to_fp16)[name = string("op_5611_cast_fp16")]; + fp32 var_5612_epsilon_0 = const()[name = string("op_5612_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5612_cast_fp16 = rsqrt(epsilon = var_5612_epsilon_0, x = var_5611_cast_fp16)[name = string("op_5612_cast_fp16")]; + tensor hidden_states_141_cast_fp16 = mul(x = inputs_111_cast_fp16, y = var_5612_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; + tensor w_113_to_fp16 = const()[name = string("w_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220405312)))]; + tensor obj_117_cast_fp16 = mul(x = w_113_to_fp16, y = hidden_states_141_cast_fp16)[name = string("obj_117_cast_fp16")]; + string query_85_pad_type_0 = const()[name = string("query_85_pad_type_0"), val = string("valid")]; + tensor query_85_strides_0 = const()[name = string("query_85_strides_0"), val = tensor([1, 1])]; + tensor query_85_pad_0 = const()[name = string("query_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_85_dilations_0 = const()[name = string("query_85_dilations_0"), val = tensor([1, 1])]; + int32 query_85_groups_0 = const()[name = string("query_85_groups_0"), val = int32(1)]; + tensor layers_14_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220407424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222504640))))[name = string("layers_14_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_85_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_85_dilations_0, groups = query_85_groups_0, pad = query_85_pad_0, pad_type = query_85_pad_type_0, strides = query_85_strides_0, weight = layers_14_self_attn_q_proj_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("query_85_cast_fp16")]; + string current_key_57_pad_type_0 = const()[name = string("current_key_57_pad_type_0"), val = string("valid")]; + tensor current_key_57_strides_0 = const()[name = string("current_key_57_strides_0"), val = tensor([1, 1])]; + tensor current_key_57_pad_0 = const()[name = string("current_key_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_57_dilations_0 = const()[name = string("current_key_57_dilations_0"), val = tensor([1, 1])]; + int32 current_key_57_groups_0 = const()[name = string("current_key_57_groups_0"), val = int32(1)]; + tensor layers_14_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222505216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223553856))))[name = string("layers_14_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_57_cast_fp16 = conv(dilations = current_key_57_dilations_0, groups = current_key_57_groups_0, pad = current_key_57_pad_0, pad_type = current_key_57_pad_type_0, strides = current_key_57_strides_0, weight = layers_14_self_attn_k_proj_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("current_key_57_cast_fp16")]; + string current_value_29_pad_type_0 = const()[name = string("current_value_29_pad_type_0"), val = string("valid")]; + tensor current_value_29_strides_0 = const()[name = string("current_value_29_strides_0"), val = tensor([1, 1])]; + tensor current_value_29_pad_0 = const()[name = string("current_value_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_29_dilations_0 = const()[name = string("current_value_29_dilations_0"), val = tensor([1, 1])]; + int32 current_value_29_groups_0 = const()[name = string("current_value_29_groups_0"), val = int32(1)]; + tensor layers_14_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223554432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224603072))))[name = string("layers_14_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_29_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_29_dilations_0, groups = current_value_29_groups_0, pad = current_value_29_pad_0, pad_type = current_value_29_pad_type_0, strides = current_value_29_strides_0, weight = layers_14_self_attn_v_proj_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("current_value_29_cast_fp16")]; + tensor var_5649 = const()[name = string("op_5649"), val = tensor([16, 128, 1, 1])]; + tensor inputs_113_cast_fp16 = reshape(shape = var_5649, x = query_85_cast_fp16)[name = string("inputs_113_cast_fp16")]; + tensor inputs_sq_115_cast_fp16 = mul(x = inputs_113_cast_fp16, y = inputs_113_cast_fp16)[name = string("inputs_sq_115_cast_fp16")]; + tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([1])]; + bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; + tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = inputs_sq_115_cast_fp16)[name = string("variance_115_cast_fp16")]; + fp16 var_5655_to_fp16 = const()[name = string("op_5655_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5656_cast_fp16 = add(x = variance_115_cast_fp16, y = var_5655_to_fp16)[name = string("op_5656_cast_fp16")]; + fp32 var_5657_epsilon_0 = const()[name = string("op_5657_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5657_cast_fp16 = rsqrt(epsilon = var_5657_epsilon_0, x = var_5656_cast_fp16)[name = string("op_5657_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = mul(x = inputs_113_cast_fp16, y = var_5657_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor w_115_to_fp16 = const()[name = string("w_115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224603648)))]; + tensor query_normed_29_cast_fp16 = mul(x = w_115_to_fp16, y = hidden_states_143_cast_fp16)[name = string("query_normed_29_cast_fp16")]; + tensor var_5665 = const()[name = string("op_5665"), val = tensor([8, 128, 1, 1])]; + tensor inputs_115_cast_fp16 = reshape(shape = var_5665, x = current_key_57_cast_fp16)[name = string("inputs_115_cast_fp16")]; + tensor inputs_sq_117_cast_fp16 = mul(x = inputs_115_cast_fp16, y = inputs_115_cast_fp16)[name = string("inputs_sq_117_cast_fp16")]; + tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([1])]; + bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; + tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = inputs_sq_117_cast_fp16)[name = string("variance_117_cast_fp16")]; + fp16 var_5671_to_fp16 = const()[name = string("op_5671_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5672_cast_fp16 = add(x = variance_117_cast_fp16, y = var_5671_to_fp16)[name = string("op_5672_cast_fp16")]; + fp32 var_5673_epsilon_0 = const()[name = string("op_5673_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5673_cast_fp16 = rsqrt(epsilon = var_5673_epsilon_0, x = var_5672_cast_fp16)[name = string("op_5673_cast_fp16")]; + tensor hidden_states_145_cast_fp16 = mul(x = inputs_115_cast_fp16, y = var_5673_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; + tensor w_117_to_fp16 = const()[name = string("w_117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224603968)))]; + tensor current_key_normed_29_cast_fp16 = mul(x = w_117_to_fp16, y = hidden_states_145_cast_fp16)[name = string("current_key_normed_29_cast_fp16")]; + tensor var_5691 = const()[name = string("op_5691"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_85_cast_fp16 = reshape(shape = var_5691, x = query_normed_29_cast_fp16)[name = string("mh_q_85_cast_fp16")]; + tensor var_5693 = const()[name = string("op_5693"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_57_cast_fp16 = reshape(shape = var_5693, x = current_key_normed_29_cast_fp16)[name = string("mh_k_57_cast_fp16")]; + tensor var_5697_cast_fp16 = mul(x = mh_q_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5697_cast_fp16")]; + tensor var_5702_begin_0 = const()[name = string("op_5702_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5702_end_0 = const()[name = string("op_5702_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_5702_end_mask_0 = const()[name = string("op_5702_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5702_cast_fp16 = slice_by_index(begin = var_5702_begin_0, end = var_5702_end_0, end_mask = var_5702_end_mask_0, x = mh_q_85_cast_fp16)[name = string("op_5702_cast_fp16")]; + tensor var_5708_begin_0 = const()[name = string("op_5708_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_5708_end_0 = const()[name = string("op_5708_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_5708_end_mask_0 = const()[name = string("op_5708_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5708_cast_fp16 = slice_by_index(begin = var_5708_begin_0, end = var_5708_end_0, end_mask = var_5708_end_mask_0, x = mh_q_85_cast_fp16)[name = string("op_5708_cast_fp16")]; + fp16 const_339_promoted_to_fp16 = const()[name = string("const_339_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5710_cast_fp16 = mul(x = var_5708_cast_fp16, y = const_339_promoted_to_fp16)[name = string("op_5710_cast_fp16")]; + bool var_5712_interleave_0 = const()[name = string("op_5712_interleave_0"), val = bool(false)]; + tensor var_5712_cast_fp16 = concat(axis = var_5590, interleave = var_5712_interleave_0, values = (var_5710_cast_fp16, var_5702_cast_fp16))[name = string("op_5712_cast_fp16")]; + tensor var_5713_cast_fp16 = mul(x = var_5712_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5713_cast_fp16")]; + tensor mh_q_87_cast_fp16 = add(x = var_5697_cast_fp16, y = var_5713_cast_fp16)[name = string("mh_q_87_cast_fp16")]; + tensor var_5715_cast_fp16 = mul(x = mh_k_57_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5715_cast_fp16")]; + tensor var_5720_begin_0 = const()[name = string("op_5720_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5720_end_0 = const()[name = string("op_5720_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_5720_end_mask_0 = const()[name = string("op_5720_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5720_cast_fp16 = slice_by_index(begin = var_5720_begin_0, end = var_5720_end_0, end_mask = var_5720_end_mask_0, x = mh_k_57_cast_fp16)[name = string("op_5720_cast_fp16")]; + tensor var_5726_begin_0 = const()[name = string("op_5726_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_5726_end_0 = const()[name = string("op_5726_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_5726_end_mask_0 = const()[name = string("op_5726_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5726_cast_fp16 = slice_by_index(begin = var_5726_begin_0, end = var_5726_end_0, end_mask = var_5726_end_mask_0, x = mh_k_57_cast_fp16)[name = string("op_5726_cast_fp16")]; + fp16 const_342_promoted_to_fp16 = const()[name = string("const_342_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5728_cast_fp16 = mul(x = var_5726_cast_fp16, y = const_342_promoted_to_fp16)[name = string("op_5728_cast_fp16")]; + bool var_5730_interleave_0 = const()[name = string("op_5730_interleave_0"), val = bool(false)]; + tensor var_5730_cast_fp16 = concat(axis = var_5590, interleave = var_5730_interleave_0, values = (var_5728_cast_fp16, var_5720_cast_fp16))[name = string("op_5730_cast_fp16")]; + tensor var_5731_cast_fp16 = mul(x = var_5730_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5731_cast_fp16")]; + tensor mh_k_59_cast_fp16 = add(x = var_5715_cast_fp16, y = var_5731_cast_fp16)[name = string("mh_k_59_cast_fp16")]; + tensor var_5735 = const()[name = string("op_5735"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_59_cast_fp16 = reshape(shape = var_5735, x = mh_k_59_cast_fp16)[name = string("current_key_59_cast_fp16")]; + tensor var_5742_cast_fp16 = mul(x = var_101_cast_fp16_14, y = var_323_cast_fp16)[name = string("op_5742_cast_fp16")]; + tensor var_5743_cast_fp16 = mul(x = current_key_59_cast_fp16, y = var_321_cast_fp16)[name = string("op_5743_cast_fp16")]; + tensor key_87_cast_fp16 = add(x = var_5742_cast_fp16, y = var_5743_cast_fp16)[name = string("key_87_cast_fp16")]; + tensor var_5746_cast_fp16 = mul(x = var_132_cast_fp16_14, y = var_323_cast_fp16)[name = string("op_5746_cast_fp16")]; + tensor var_5747_cast_fp16 = mul(x = current_value_29_cast_fp16, y = var_321_cast_fp16)[name = string("op_5747_cast_fp16")]; + tensor value_57_cast_fp16 = add(x = var_5746_cast_fp16, y = var_5747_cast_fp16)[name = string("value_57_cast_fp16")]; + tensor var_5751 = const()[name = string("op_5751"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_57_cast_fp16 = reshape(shape = var_5751, x = key_87_cast_fp16)[name = string("key_heads_57_cast_fp16")]; + tensor var_5753 = const()[name = string("op_5753"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_57_cast_fp16 = reshape(shape = var_5753, x = value_57_cast_fp16)[name = string("value_heads_57_cast_fp16")]; + tensor var_5756_begin_0 = const()[name = string("op_5756_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5756_end_0 = const()[name = string("op_5756_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5756_end_mask_0 = const()[name = string("op_5756_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5756_cast_fp16 = slice_by_index(begin = var_5756_begin_0, end = var_5756_end_0, end_mask = var_5756_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5756_cast_fp16")]; + tensor var_5760_begin_0 = const()[name = string("op_5760_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5760_end_0 = const()[name = string("op_5760_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5760_end_mask_0 = const()[name = string("op_5760_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5760_cast_fp16 = slice_by_index(begin = var_5760_begin_0, end = var_5760_end_0, end_mask = var_5760_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5760_cast_fp16")]; + tensor var_5772_begin_0 = const()[name = string("op_5772_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_5772_end_0 = const()[name = string("op_5772_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_5772_end_mask_0 = const()[name = string("op_5772_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5772_cast_fp16 = slice_by_index(begin = var_5772_begin_0, end = var_5772_end_0, end_mask = var_5772_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5772_cast_fp16")]; + tensor var_5776_begin_0 = const()[name = string("op_5776_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_5776_end_0 = const()[name = string("op_5776_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_5776_end_mask_0 = const()[name = string("op_5776_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5776_cast_fp16 = slice_by_index(begin = var_5776_begin_0, end = var_5776_end_0, end_mask = var_5776_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5776_cast_fp16")]; + tensor var_5788_begin_0 = const()[name = string("op_5788_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5788_end_0 = const()[name = string("op_5788_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5788_end_mask_0 = const()[name = string("op_5788_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5788_cast_fp16 = slice_by_index(begin = var_5788_begin_0, end = var_5788_end_0, end_mask = var_5788_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5788_cast_fp16")]; + tensor var_5792_begin_0 = const()[name = string("op_5792_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_5792_end_0 = const()[name = string("op_5792_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_5792_end_mask_0 = const()[name = string("op_5792_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5792_cast_fp16 = slice_by_index(begin = var_5792_begin_0, end = var_5792_end_0, end_mask = var_5792_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5792_cast_fp16")]; + tensor var_5804_begin_0 = const()[name = string("op_5804_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5804_end_0 = const()[name = string("op_5804_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5804_end_mask_0 = const()[name = string("op_5804_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5804_cast_fp16 = slice_by_index(begin = var_5804_begin_0, end = var_5804_end_0, end_mask = var_5804_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5804_cast_fp16")]; + tensor var_5808_begin_0 = const()[name = string("op_5808_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_5808_end_0 = const()[name = string("op_5808_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_5808_end_mask_0 = const()[name = string("op_5808_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5808_cast_fp16 = slice_by_index(begin = var_5808_begin_0, end = var_5808_end_0, end_mask = var_5808_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5808_cast_fp16")]; + tensor var_5820_begin_0 = const()[name = string("op_5820_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5820_end_0 = const()[name = string("op_5820_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5820_end_mask_0 = const()[name = string("op_5820_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5820_cast_fp16 = slice_by_index(begin = var_5820_begin_0, end = var_5820_end_0, end_mask = var_5820_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5820_cast_fp16")]; + tensor var_5824_begin_0 = const()[name = string("op_5824_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_5824_end_0 = const()[name = string("op_5824_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_5824_end_mask_0 = const()[name = string("op_5824_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5824_cast_fp16 = slice_by_index(begin = var_5824_begin_0, end = var_5824_end_0, end_mask = var_5824_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5824_cast_fp16")]; + tensor var_5836_begin_0 = const()[name = string("op_5836_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5836_end_0 = const()[name = string("op_5836_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5836_end_mask_0 = const()[name = string("op_5836_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5836_cast_fp16 = slice_by_index(begin = var_5836_begin_0, end = var_5836_end_0, end_mask = var_5836_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5836_cast_fp16")]; + tensor var_5840_begin_0 = const()[name = string("op_5840_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_5840_end_0 = const()[name = string("op_5840_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_5840_end_mask_0 = const()[name = string("op_5840_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5840_cast_fp16 = slice_by_index(begin = var_5840_begin_0, end = var_5840_end_0, end_mask = var_5840_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5840_cast_fp16")]; + tensor var_5852_begin_0 = const()[name = string("op_5852_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5852_end_0 = const()[name = string("op_5852_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5852_end_mask_0 = const()[name = string("op_5852_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5852_cast_fp16 = slice_by_index(begin = var_5852_begin_0, end = var_5852_end_0, end_mask = var_5852_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5852_cast_fp16")]; + tensor var_5856_begin_0 = const()[name = string("op_5856_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_5856_end_0 = const()[name = string("op_5856_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_5856_end_mask_0 = const()[name = string("op_5856_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5856_cast_fp16 = slice_by_index(begin = var_5856_begin_0, end = var_5856_end_0, end_mask = var_5856_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5856_cast_fp16")]; + tensor var_5868_begin_0 = const()[name = string("op_5868_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5868_end_0 = const()[name = string("op_5868_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5868_end_mask_0 = const()[name = string("op_5868_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5868_cast_fp16 = slice_by_index(begin = var_5868_begin_0, end = var_5868_end_0, end_mask = var_5868_end_mask_0, x = key_heads_57_cast_fp16)[name = string("op_5868_cast_fp16")]; + tensor var_5872_begin_0 = const()[name = string("op_5872_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_5872_end_0 = const()[name = string("op_5872_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_5872_end_mask_0 = const()[name = string("op_5872_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5872_cast_fp16 = slice_by_index(begin = var_5872_begin_0, end = var_5872_end_0, end_mask = var_5872_end_mask_0, x = value_heads_57_cast_fp16)[name = string("op_5872_cast_fp16")]; + bool key_heads_59_interleave_0 = const()[name = string("key_heads_59_interleave_0"), val = bool(false)]; + tensor key_heads_59_cast_fp16 = concat(axis = var_5598, interleave = key_heads_59_interleave_0, values = (var_5756_cast_fp16, var_5756_cast_fp16, var_5772_cast_fp16, var_5772_cast_fp16, var_5788_cast_fp16, var_5788_cast_fp16, var_5804_cast_fp16, var_5804_cast_fp16, var_5820_cast_fp16, var_5820_cast_fp16, var_5836_cast_fp16, var_5836_cast_fp16, var_5852_cast_fp16, var_5852_cast_fp16, var_5868_cast_fp16, var_5868_cast_fp16))[name = string("key_heads_59_cast_fp16")]; + bool value_heads_59_interleave_0 = const()[name = string("value_heads_59_interleave_0"), val = bool(false)]; + tensor value_heads_59_cast_fp16 = concat(axis = var_5598, interleave = value_heads_59_interleave_0, values = (var_5760_cast_fp16, var_5760_cast_fp16, var_5776_cast_fp16, var_5776_cast_fp16, var_5792_cast_fp16, var_5792_cast_fp16, var_5808_cast_fp16, var_5808_cast_fp16, var_5824_cast_fp16, var_5824_cast_fp16, var_5840_cast_fp16, var_5840_cast_fp16, var_5856_cast_fp16, var_5856_cast_fp16, var_5872_cast_fp16, var_5872_cast_fp16))[name = string("value_heads_59_cast_fp16")]; + fp16 var_5895_to_fp16 = const()[name = string("op_5895_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_5896_cast_fp16 = mul(x = mh_q_87_cast_fp16, y = var_5895_to_fp16)[name = string("op_5896_cast_fp16")]; + bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)]; + bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)]; + tensor mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_5896_cast_fp16, y = key_heads_59_cast_fp16)[name = string("mh_w_57_cast_fp16")]; + tensor mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_59_cast_fp16")]; + tensor var_5908_cast_fp16 = softmax(axis = var_5580, x = mh_w_59_cast_fp16)[name = string("op_5908_cast_fp16")]; + bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)]; + bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = value_heads_59_cast_fp16, y = var_5908_cast_fp16)[name = string("attn_29_cast_fp16")]; + tensor var_5913 = const()[name = string("op_5913"), val = tensor([1, -1, 1, 1])]; + tensor input_113_cast_fp16 = reshape(shape = var_5913, x = attn_29_cast_fp16)[name = string("input_113_cast_fp16")]; + string obj_123_pad_type_0 = const()[name = string("obj_123_pad_type_0"), val = string("valid")]; + tensor obj_123_strides_0 = const()[name = string("obj_123_strides_0"), val = tensor([1, 1])]; + tensor obj_123_pad_0 = const()[name = string("obj_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_123_dilations_0 = const()[name = string("obj_123_dilations_0"), val = tensor([1, 1])]; + int32 obj_123_groups_0 = const()[name = string("obj_123_groups_0"), val = int32(1)]; + tensor layers_14_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224604288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226701504))))[name = string("layers_14_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_123_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_123_dilations_0, groups = obj_123_groups_0, pad = obj_123_pad_0, pad_type = obj_123_pad_type_0, strides = obj_123_strides_0, weight = layers_14_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("obj_123_cast_fp16")]; + tensor inputs_117_cast_fp16 = add(x = inputs_111_cast_fp16, y = obj_123_cast_fp16)[name = string("inputs_117_cast_fp16")]; + tensor inputs_sq_119_cast_fp16 = mul(x = inputs_117_cast_fp16, y = inputs_117_cast_fp16)[name = string("inputs_sq_119_cast_fp16")]; + tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([1])]; + bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; + tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = inputs_sq_119_cast_fp16)[name = string("variance_119_cast_fp16")]; + fp16 var_5931_to_fp16 = const()[name = string("op_5931_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5932_cast_fp16 = add(x = variance_119_cast_fp16, y = var_5931_to_fp16)[name = string("op_5932_cast_fp16")]; + fp32 var_5933_epsilon_0 = const()[name = string("op_5933_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5933_cast_fp16 = rsqrt(epsilon = var_5933_epsilon_0, x = var_5932_cast_fp16)[name = string("op_5933_cast_fp16")]; + tensor hidden_states_147_cast_fp16 = mul(x = inputs_117_cast_fp16, y = var_5933_cast_fp16)[name = string("hidden_states_147_cast_fp16")]; + tensor w_119_to_fp16 = const()[name = string("w_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226702080)))]; + tensor input_115_cast_fp16 = mul(x = w_119_to_fp16, y = hidden_states_147_cast_fp16)[name = string("input_115_cast_fp16")]; + string input_117_pad_type_0 = const()[name = string("input_117_pad_type_0"), val = string("valid")]; + tensor input_117_strides_0 = const()[name = string("input_117_strides_0"), val = tensor([1, 1])]; + tensor input_117_pad_0 = const()[name = string("input_117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_117_dilations_0 = const()[name = string("input_117_dilations_0"), val = tensor([1, 1])]; + int32 input_117_groups_0 = const()[name = string("input_117_groups_0"), val = int32(1)]; + tensor layers_14_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226704192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229849984))))[name = string("layers_14_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_117_cast_fp16 = conv(dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = layers_14_mlp_gate_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("input_117_cast_fp16")]; + tensor var_5947_cast_fp16 = silu(x = input_117_cast_fp16)[name = string("op_5947_cast_fp16")]; + string var_5953_pad_type_0 = const()[name = string("op_5953_pad_type_0"), val = string("valid")]; + tensor var_5953_strides_0 = const()[name = string("op_5953_strides_0"), val = tensor([1, 1])]; + tensor var_5953_pad_0 = const()[name = string("op_5953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5953_dilations_0 = const()[name = string("op_5953_dilations_0"), val = tensor([1, 1])]; + int32 var_5953_groups_0 = const()[name = string("op_5953_groups_0"), val = int32(1)]; + tensor layers_14_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229850560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232996352))))[name = string("layers_14_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_5953_cast_fp16 = conv(dilations = var_5953_dilations_0, groups = var_5953_groups_0, pad = var_5953_pad_0, pad_type = var_5953_pad_type_0, strides = var_5953_strides_0, weight = layers_14_mlp_up_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_5953_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_5947_cast_fp16, y = var_5953_cast_fp16)[name = string("input_119_cast_fp16")]; + string hidden_states_149_pad_type_0 = const()[name = string("hidden_states_149_pad_type_0"), val = string("valid")]; + tensor hidden_states_149_strides_0 = const()[name = string("hidden_states_149_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_149_pad_0 = const()[name = string("hidden_states_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_149_dilations_0 = const()[name = string("hidden_states_149_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_149_groups_0 = const()[name = string("hidden_states_149_groups_0"), val = int32(1)]; + tensor layers_14_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232996928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236142720))))[name = string("layers_14_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_149_cast_fp16 = conv(dilations = hidden_states_149_dilations_0, groups = hidden_states_149_groups_0, pad = hidden_states_149_pad_0, pad_type = hidden_states_149_pad_type_0, strides = hidden_states_149_strides_0, weight = layers_14_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + tensor inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = hidden_states_149_cast_fp16)[name = string("inputs_119_cast_fp16")]; + int32 var_5967 = const()[name = string("op_5967"), val = int32(3)]; + int32 var_5977 = const()[name = string("op_5977"), val = int32(-2)]; + int32 var_5985 = const()[name = string("op_5985"), val = int32(1)]; + tensor inputs_sq_121_cast_fp16 = mul(x = inputs_119_cast_fp16, y = inputs_119_cast_fp16)[name = string("inputs_sq_121_cast_fp16")]; + tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([1])]; + bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; + tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = inputs_sq_121_cast_fp16)[name = string("variance_121_cast_fp16")]; + fp16 var_5997_to_fp16 = const()[name = string("op_5997_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5998_cast_fp16 = add(x = variance_121_cast_fp16, y = var_5997_to_fp16)[name = string("op_5998_cast_fp16")]; + fp32 var_5999_epsilon_0 = const()[name = string("op_5999_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5999_cast_fp16 = rsqrt(epsilon = var_5999_epsilon_0, x = var_5998_cast_fp16)[name = string("op_5999_cast_fp16")]; + tensor hidden_states_151_cast_fp16 = mul(x = inputs_119_cast_fp16, y = var_5999_cast_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor w_121_to_fp16 = const()[name = string("w_121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236143296)))]; + tensor obj_125_cast_fp16 = mul(x = w_121_to_fp16, y = hidden_states_151_cast_fp16)[name = string("obj_125_cast_fp16")]; + string query_91_pad_type_0 = const()[name = string("query_91_pad_type_0"), val = string("valid")]; + tensor query_91_strides_0 = const()[name = string("query_91_strides_0"), val = tensor([1, 1])]; + tensor query_91_pad_0 = const()[name = string("query_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_91_dilations_0 = const()[name = string("query_91_dilations_0"), val = tensor([1, 1])]; + int32 query_91_groups_0 = const()[name = string("query_91_groups_0"), val = int32(1)]; + tensor layers_15_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236145408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238242624))))[name = string("layers_15_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_91_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_91_dilations_0, groups = query_91_groups_0, pad = query_91_pad_0, pad_type = query_91_pad_type_0, strides = query_91_strides_0, weight = layers_15_self_attn_q_proj_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("query_91_cast_fp16")]; + string current_key_61_pad_type_0 = const()[name = string("current_key_61_pad_type_0"), val = string("valid")]; + tensor current_key_61_strides_0 = const()[name = string("current_key_61_strides_0"), val = tensor([1, 1])]; + tensor current_key_61_pad_0 = const()[name = string("current_key_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_61_dilations_0 = const()[name = string("current_key_61_dilations_0"), val = tensor([1, 1])]; + int32 current_key_61_groups_0 = const()[name = string("current_key_61_groups_0"), val = int32(1)]; + tensor layers_15_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238243200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239291840))))[name = string("layers_15_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_61_cast_fp16 = conv(dilations = current_key_61_dilations_0, groups = current_key_61_groups_0, pad = current_key_61_pad_0, pad_type = current_key_61_pad_type_0, strides = current_key_61_strides_0, weight = layers_15_self_attn_k_proj_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("current_key_61_cast_fp16")]; + string current_value_31_pad_type_0 = const()[name = string("current_value_31_pad_type_0"), val = string("valid")]; + tensor current_value_31_strides_0 = const()[name = string("current_value_31_strides_0"), val = tensor([1, 1])]; + tensor current_value_31_pad_0 = const()[name = string("current_value_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_31_dilations_0 = const()[name = string("current_value_31_dilations_0"), val = tensor([1, 1])]; + int32 current_value_31_groups_0 = const()[name = string("current_value_31_groups_0"), val = int32(1)]; + tensor layers_15_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239292416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240341056))))[name = string("layers_15_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_31_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_31_dilations_0, groups = current_value_31_groups_0, pad = current_value_31_pad_0, pad_type = current_value_31_pad_type_0, strides = current_value_31_strides_0, weight = layers_15_self_attn_v_proj_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("current_value_31_cast_fp16")]; + tensor var_6036 = const()[name = string("op_6036"), val = tensor([16, 128, 1, 1])]; + tensor inputs_121_cast_fp16 = reshape(shape = var_6036, x = query_91_cast_fp16)[name = string("inputs_121_cast_fp16")]; + tensor inputs_sq_123_cast_fp16 = mul(x = inputs_121_cast_fp16, y = inputs_121_cast_fp16)[name = string("inputs_sq_123_cast_fp16")]; + tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([1])]; + bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; + tensor variance_123_cast_fp16 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = inputs_sq_123_cast_fp16)[name = string("variance_123_cast_fp16")]; + fp16 var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6043_cast_fp16 = add(x = variance_123_cast_fp16, y = var_6042_to_fp16)[name = string("op_6043_cast_fp16")]; + fp32 var_6044_epsilon_0 = const()[name = string("op_6044_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6044_cast_fp16 = rsqrt(epsilon = var_6044_epsilon_0, x = var_6043_cast_fp16)[name = string("op_6044_cast_fp16")]; + tensor hidden_states_153_cast_fp16 = mul(x = inputs_121_cast_fp16, y = var_6044_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor w_123_to_fp16 = const()[name = string("w_123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240341632)))]; + tensor query_normed_31_cast_fp16 = mul(x = w_123_to_fp16, y = hidden_states_153_cast_fp16)[name = string("query_normed_31_cast_fp16")]; + tensor var_6052 = const()[name = string("op_6052"), val = tensor([8, 128, 1, 1])]; + tensor inputs_123_cast_fp16 = reshape(shape = var_6052, x = current_key_61_cast_fp16)[name = string("inputs_123_cast_fp16")]; + tensor inputs_sq_125_cast_fp16 = mul(x = inputs_123_cast_fp16, y = inputs_123_cast_fp16)[name = string("inputs_sq_125_cast_fp16")]; + tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([1])]; + bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; + tensor variance_125_cast_fp16 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = inputs_sq_125_cast_fp16)[name = string("variance_125_cast_fp16")]; + fp16 var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6059_cast_fp16 = add(x = variance_125_cast_fp16, y = var_6058_to_fp16)[name = string("op_6059_cast_fp16")]; + fp32 var_6060_epsilon_0 = const()[name = string("op_6060_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6060_cast_fp16 = rsqrt(epsilon = var_6060_epsilon_0, x = var_6059_cast_fp16)[name = string("op_6060_cast_fp16")]; + tensor hidden_states_155_cast_fp16 = mul(x = inputs_123_cast_fp16, y = var_6060_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; + tensor w_125_to_fp16 = const()[name = string("w_125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240341952)))]; + tensor current_key_normed_31_cast_fp16 = mul(x = w_125_to_fp16, y = hidden_states_155_cast_fp16)[name = string("current_key_normed_31_cast_fp16")]; + tensor var_6078 = const()[name = string("op_6078"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_91_cast_fp16 = reshape(shape = var_6078, x = query_normed_31_cast_fp16)[name = string("mh_q_91_cast_fp16")]; + tensor var_6080 = const()[name = string("op_6080"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_61_cast_fp16 = reshape(shape = var_6080, x = current_key_normed_31_cast_fp16)[name = string("mh_k_61_cast_fp16")]; + tensor var_6084_cast_fp16 = mul(x = mh_q_91_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6084_cast_fp16")]; + tensor var_6089_begin_0 = const()[name = string("op_6089_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6089_end_0 = const()[name = string("op_6089_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_6089_end_mask_0 = const()[name = string("op_6089_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6089_cast_fp16 = slice_by_index(begin = var_6089_begin_0, end = var_6089_end_0, end_mask = var_6089_end_mask_0, x = mh_q_91_cast_fp16)[name = string("op_6089_cast_fp16")]; + tensor var_6095_begin_0 = const()[name = string("op_6095_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6095_end_0 = const()[name = string("op_6095_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_6095_end_mask_0 = const()[name = string("op_6095_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6095_cast_fp16 = slice_by_index(begin = var_6095_begin_0, end = var_6095_end_0, end_mask = var_6095_end_mask_0, x = mh_q_91_cast_fp16)[name = string("op_6095_cast_fp16")]; + fp16 const_362_promoted_to_fp16 = const()[name = string("const_362_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6097_cast_fp16 = mul(x = var_6095_cast_fp16, y = const_362_promoted_to_fp16)[name = string("op_6097_cast_fp16")]; + bool var_6099_interleave_0 = const()[name = string("op_6099_interleave_0"), val = bool(false)]; + tensor var_6099_cast_fp16 = concat(axis = var_5977, interleave = var_6099_interleave_0, values = (var_6097_cast_fp16, var_6089_cast_fp16))[name = string("op_6099_cast_fp16")]; + tensor var_6100_cast_fp16 = mul(x = var_6099_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6100_cast_fp16")]; + tensor mh_q_93_cast_fp16 = add(x = var_6084_cast_fp16, y = var_6100_cast_fp16)[name = string("mh_q_93_cast_fp16")]; + tensor var_6102_cast_fp16 = mul(x = mh_k_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6102_cast_fp16")]; + tensor var_6107_begin_0 = const()[name = string("op_6107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6107_end_0 = const()[name = string("op_6107_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_6107_end_mask_0 = const()[name = string("op_6107_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6107_cast_fp16 = slice_by_index(begin = var_6107_begin_0, end = var_6107_end_0, end_mask = var_6107_end_mask_0, x = mh_k_61_cast_fp16)[name = string("op_6107_cast_fp16")]; + tensor var_6113_begin_0 = const()[name = string("op_6113_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6113_end_0 = const()[name = string("op_6113_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_6113_end_mask_0 = const()[name = string("op_6113_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6113_cast_fp16 = slice_by_index(begin = var_6113_begin_0, end = var_6113_end_0, end_mask = var_6113_end_mask_0, x = mh_k_61_cast_fp16)[name = string("op_6113_cast_fp16")]; + fp16 const_365_promoted_to_fp16 = const()[name = string("const_365_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6115_cast_fp16 = mul(x = var_6113_cast_fp16, y = const_365_promoted_to_fp16)[name = string("op_6115_cast_fp16")]; + bool var_6117_interleave_0 = const()[name = string("op_6117_interleave_0"), val = bool(false)]; + tensor var_6117_cast_fp16 = concat(axis = var_5977, interleave = var_6117_interleave_0, values = (var_6115_cast_fp16, var_6107_cast_fp16))[name = string("op_6117_cast_fp16")]; + tensor var_6118_cast_fp16 = mul(x = var_6117_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6118_cast_fp16")]; + tensor mh_k_63_cast_fp16 = add(x = var_6102_cast_fp16, y = var_6118_cast_fp16)[name = string("mh_k_63_cast_fp16")]; + tensor var_6122 = const()[name = string("op_6122"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_63_cast_fp16 = reshape(shape = var_6122, x = mh_k_63_cast_fp16)[name = string("current_key_63_cast_fp16")]; + tensor var_6129_cast_fp16 = mul(x = var_101_cast_fp16_15, y = var_323_cast_fp16)[name = string("op_6129_cast_fp16")]; + tensor var_6130_cast_fp16 = mul(x = current_key_63_cast_fp16, y = var_321_cast_fp16)[name = string("op_6130_cast_fp16")]; + tensor key_93_cast_fp16 = add(x = var_6129_cast_fp16, y = var_6130_cast_fp16)[name = string("key_93_cast_fp16")]; + tensor var_6133_cast_fp16 = mul(x = var_132_cast_fp16_15, y = var_323_cast_fp16)[name = string("op_6133_cast_fp16")]; + tensor var_6134_cast_fp16 = mul(x = current_value_31_cast_fp16, y = var_321_cast_fp16)[name = string("op_6134_cast_fp16")]; + tensor value_61_cast_fp16 = add(x = var_6133_cast_fp16, y = var_6134_cast_fp16)[name = string("value_61_cast_fp16")]; + tensor var_6138 = const()[name = string("op_6138"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_61_cast_fp16 = reshape(shape = var_6138, x = key_93_cast_fp16)[name = string("key_heads_61_cast_fp16")]; + tensor var_6140 = const()[name = string("op_6140"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_61_cast_fp16 = reshape(shape = var_6140, x = value_61_cast_fp16)[name = string("value_heads_61_cast_fp16")]; + tensor var_6143_begin_0 = const()[name = string("op_6143_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6143_end_0 = const()[name = string("op_6143_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6143_end_mask_0 = const()[name = string("op_6143_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6143_cast_fp16 = slice_by_index(begin = var_6143_begin_0, end = var_6143_end_0, end_mask = var_6143_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6143_cast_fp16")]; + tensor var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6147_end_0 = const()[name = string("op_6147_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = var_6147_end_0, end_mask = var_6147_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6147_cast_fp16")]; + tensor var_6159_begin_0 = const()[name = string("op_6159_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6159_end_0 = const()[name = string("op_6159_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6159_end_mask_0 = const()[name = string("op_6159_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6159_cast_fp16 = slice_by_index(begin = var_6159_begin_0, end = var_6159_end_0, end_mask = var_6159_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6159_cast_fp16")]; + tensor var_6163_begin_0 = const()[name = string("op_6163_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6163_end_0 = const()[name = string("op_6163_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6163_end_mask_0 = const()[name = string("op_6163_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6163_cast_fp16 = slice_by_index(begin = var_6163_begin_0, end = var_6163_end_0, end_mask = var_6163_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6163_cast_fp16")]; + tensor var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6175_end_0 = const()[name = string("op_6175_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = var_6175_end_0, end_mask = var_6175_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6175_cast_fp16")]; + tensor var_6179_begin_0 = const()[name = string("op_6179_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6179_end_0 = const()[name = string("op_6179_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6179_end_mask_0 = const()[name = string("op_6179_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6179_cast_fp16 = slice_by_index(begin = var_6179_begin_0, end = var_6179_end_0, end_mask = var_6179_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6179_cast_fp16")]; + tensor var_6191_begin_0 = const()[name = string("op_6191_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6191_end_0 = const()[name = string("op_6191_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6191_end_mask_0 = const()[name = string("op_6191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6191_cast_fp16 = slice_by_index(begin = var_6191_begin_0, end = var_6191_end_0, end_mask = var_6191_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6191_cast_fp16")]; + tensor var_6195_begin_0 = const()[name = string("op_6195_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6195_end_0 = const()[name = string("op_6195_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6195_end_mask_0 = const()[name = string("op_6195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6195_cast_fp16 = slice_by_index(begin = var_6195_begin_0, end = var_6195_end_0, end_mask = var_6195_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6195_cast_fp16")]; + tensor var_6207_begin_0 = const()[name = string("op_6207_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6207_end_0 = const()[name = string("op_6207_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6207_end_mask_0 = const()[name = string("op_6207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6207_cast_fp16 = slice_by_index(begin = var_6207_begin_0, end = var_6207_end_0, end_mask = var_6207_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6207_cast_fp16")]; + tensor var_6211_begin_0 = const()[name = string("op_6211_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6211_end_0 = const()[name = string("op_6211_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6211_end_mask_0 = const()[name = string("op_6211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6211_cast_fp16 = slice_by_index(begin = var_6211_begin_0, end = var_6211_end_0, end_mask = var_6211_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6211_cast_fp16")]; + tensor var_6223_begin_0 = const()[name = string("op_6223_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_6223_end_0 = const()[name = string("op_6223_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_6223_end_mask_0 = const()[name = string("op_6223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6223_cast_fp16 = slice_by_index(begin = var_6223_begin_0, end = var_6223_end_0, end_mask = var_6223_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6223_cast_fp16")]; + tensor var_6227_begin_0 = const()[name = string("op_6227_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_6227_end_0 = const()[name = string("op_6227_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_6227_end_mask_0 = const()[name = string("op_6227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6227_cast_fp16 = slice_by_index(begin = var_6227_begin_0, end = var_6227_end_0, end_mask = var_6227_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6227_cast_fp16")]; + tensor var_6239_begin_0 = const()[name = string("op_6239_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_6239_end_0 = const()[name = string("op_6239_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_6239_end_mask_0 = const()[name = string("op_6239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6239_cast_fp16 = slice_by_index(begin = var_6239_begin_0, end = var_6239_end_0, end_mask = var_6239_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6239_cast_fp16")]; + tensor var_6243_begin_0 = const()[name = string("op_6243_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_6243_end_0 = const()[name = string("op_6243_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_6243_end_mask_0 = const()[name = string("op_6243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6243_cast_fp16 = slice_by_index(begin = var_6243_begin_0, end = var_6243_end_0, end_mask = var_6243_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6243_cast_fp16")]; + tensor var_6255_begin_0 = const()[name = string("op_6255_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_6255_end_0 = const()[name = string("op_6255_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6255_end_mask_0 = const()[name = string("op_6255_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6255_cast_fp16 = slice_by_index(begin = var_6255_begin_0, end = var_6255_end_0, end_mask = var_6255_end_mask_0, x = key_heads_61_cast_fp16)[name = string("op_6255_cast_fp16")]; + tensor var_6259_begin_0 = const()[name = string("op_6259_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_6259_end_0 = const()[name = string("op_6259_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6259_end_mask_0 = const()[name = string("op_6259_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6259_cast_fp16 = slice_by_index(begin = var_6259_begin_0, end = var_6259_end_0, end_mask = var_6259_end_mask_0, x = value_heads_61_cast_fp16)[name = string("op_6259_cast_fp16")]; + bool key_heads_63_interleave_0 = const()[name = string("key_heads_63_interleave_0"), val = bool(false)]; + tensor key_heads_63_cast_fp16 = concat(axis = var_5985, interleave = key_heads_63_interleave_0, values = (var_6143_cast_fp16, var_6143_cast_fp16, var_6159_cast_fp16, var_6159_cast_fp16, var_6175_cast_fp16, var_6175_cast_fp16, var_6191_cast_fp16, var_6191_cast_fp16, var_6207_cast_fp16, var_6207_cast_fp16, var_6223_cast_fp16, var_6223_cast_fp16, var_6239_cast_fp16, var_6239_cast_fp16, var_6255_cast_fp16, var_6255_cast_fp16))[name = string("key_heads_63_cast_fp16")]; + bool value_heads_63_interleave_0 = const()[name = string("value_heads_63_interleave_0"), val = bool(false)]; + tensor value_heads_63_cast_fp16 = concat(axis = var_5985, interleave = value_heads_63_interleave_0, values = (var_6147_cast_fp16, var_6147_cast_fp16, var_6163_cast_fp16, var_6163_cast_fp16, var_6179_cast_fp16, var_6179_cast_fp16, var_6195_cast_fp16, var_6195_cast_fp16, var_6211_cast_fp16, var_6211_cast_fp16, var_6227_cast_fp16, var_6227_cast_fp16, var_6243_cast_fp16, var_6243_cast_fp16, var_6259_cast_fp16, var_6259_cast_fp16))[name = string("value_heads_63_cast_fp16")]; + fp16 var_6282_to_fp16 = const()[name = string("op_6282_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_6283_cast_fp16 = mul(x = mh_q_93_cast_fp16, y = var_6282_to_fp16)[name = string("op_6283_cast_fp16")]; + bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)]; + bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_6283_cast_fp16, y = key_heads_63_cast_fp16)[name = string("mh_w_61_cast_fp16")]; + tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_63_cast_fp16")]; + tensor var_6295_cast_fp16 = softmax(axis = var_5967, x = mh_w_63_cast_fp16)[name = string("op_6295_cast_fp16")]; + bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)]; + bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = value_heads_63_cast_fp16, y = var_6295_cast_fp16)[name = string("attn_31_cast_fp16")]; + tensor var_6300 = const()[name = string("op_6300"), val = tensor([1, -1, 1, 1])]; + tensor input_121_cast_fp16 = reshape(shape = var_6300, x = attn_31_cast_fp16)[name = string("input_121_cast_fp16")]; + string obj_131_pad_type_0 = const()[name = string("obj_131_pad_type_0"), val = string("valid")]; + tensor obj_131_strides_0 = const()[name = string("obj_131_strides_0"), val = tensor([1, 1])]; + tensor obj_131_pad_0 = const()[name = string("obj_131_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_131_dilations_0 = const()[name = string("obj_131_dilations_0"), val = tensor([1, 1])]; + int32 obj_131_groups_0 = const()[name = string("obj_131_groups_0"), val = int32(1)]; + tensor layers_15_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240342272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242439488))))[name = string("layers_15_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_131_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_131_dilations_0, groups = obj_131_groups_0, pad = obj_131_pad_0, pad_type = obj_131_pad_type_0, strides = obj_131_strides_0, weight = layers_15_self_attn_o_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("obj_131_cast_fp16")]; + tensor inputs_125_cast_fp16 = add(x = inputs_119_cast_fp16, y = obj_131_cast_fp16)[name = string("inputs_125_cast_fp16")]; + tensor inputs_sq_127_cast_fp16 = mul(x = inputs_125_cast_fp16, y = inputs_125_cast_fp16)[name = string("inputs_sq_127_cast_fp16")]; + tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([1])]; + bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; + tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = inputs_sq_127_cast_fp16)[name = string("variance_127_cast_fp16")]; + fp16 var_6318_to_fp16 = const()[name = string("op_6318_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6319_cast_fp16 = add(x = variance_127_cast_fp16, y = var_6318_to_fp16)[name = string("op_6319_cast_fp16")]; + fp32 var_6320_epsilon_0 = const()[name = string("op_6320_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6320_cast_fp16 = rsqrt(epsilon = var_6320_epsilon_0, x = var_6319_cast_fp16)[name = string("op_6320_cast_fp16")]; + tensor hidden_states_157_cast_fp16 = mul(x = inputs_125_cast_fp16, y = var_6320_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; + tensor w_127_to_fp16 = const()[name = string("w_127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242440064)))]; + tensor input_123_cast_fp16 = mul(x = w_127_to_fp16, y = hidden_states_157_cast_fp16)[name = string("input_123_cast_fp16")]; + string input_125_pad_type_0 = const()[name = string("input_125_pad_type_0"), val = string("valid")]; + tensor input_125_strides_0 = const()[name = string("input_125_strides_0"), val = tensor([1, 1])]; + tensor input_125_pad_0 = const()[name = string("input_125_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_125_dilations_0 = const()[name = string("input_125_dilations_0"), val = tensor([1, 1])]; + int32 input_125_groups_0 = const()[name = string("input_125_groups_0"), val = int32(1)]; + tensor layers_15_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242442176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245587968))))[name = string("layers_15_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_125_cast_fp16 = conv(dilations = input_125_dilations_0, groups = input_125_groups_0, pad = input_125_pad_0, pad_type = input_125_pad_type_0, strides = input_125_strides_0, weight = layers_15_mlp_gate_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("input_125_cast_fp16")]; + tensor var_6334_cast_fp16 = silu(x = input_125_cast_fp16)[name = string("op_6334_cast_fp16")]; + string var_6340_pad_type_0 = const()[name = string("op_6340_pad_type_0"), val = string("valid")]; + tensor var_6340_strides_0 = const()[name = string("op_6340_strides_0"), val = tensor([1, 1])]; + tensor var_6340_pad_0 = const()[name = string("op_6340_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6340_dilations_0 = const()[name = string("op_6340_dilations_0"), val = tensor([1, 1])]; + int32 var_6340_groups_0 = const()[name = string("op_6340_groups_0"), val = int32(1)]; + tensor layers_15_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245588544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248734336))))[name = string("layers_15_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_6340_cast_fp16 = conv(dilations = var_6340_dilations_0, groups = var_6340_groups_0, pad = var_6340_pad_0, pad_type = var_6340_pad_type_0, strides = var_6340_strides_0, weight = layers_15_mlp_up_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("op_6340_cast_fp16")]; + tensor input_127_cast_fp16 = mul(x = var_6334_cast_fp16, y = var_6340_cast_fp16)[name = string("input_127_cast_fp16")]; + string hidden_states_159_pad_type_0 = const()[name = string("hidden_states_159_pad_type_0"), val = string("valid")]; + tensor hidden_states_159_strides_0 = const()[name = string("hidden_states_159_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_159_pad_0 = const()[name = string("hidden_states_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_159_dilations_0 = const()[name = string("hidden_states_159_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_159_groups_0 = const()[name = string("hidden_states_159_groups_0"), val = int32(1)]; + tensor layers_15_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248734912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251880704))))[name = string("layers_15_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_159_cast_fp16 = conv(dilations = hidden_states_159_dilations_0, groups = hidden_states_159_groups_0, pad = hidden_states_159_pad_0, pad_type = hidden_states_159_pad_type_0, strides = hidden_states_159_strides_0, weight = layers_15_mlp_down_proj_weight_to_fp16_palettized, x = input_127_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + tensor inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("inputs_127_cast_fp16")]; + int32 var_6354 = const()[name = string("op_6354"), val = int32(3)]; + int32 var_6364 = const()[name = string("op_6364"), val = int32(-2)]; + int32 var_6372 = const()[name = string("op_6372"), val = int32(1)]; + tensor inputs_sq_129_cast_fp16 = mul(x = inputs_127_cast_fp16, y = inputs_127_cast_fp16)[name = string("inputs_sq_129_cast_fp16")]; + tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([1])]; + bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; + tensor variance_129_cast_fp16 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = inputs_sq_129_cast_fp16)[name = string("variance_129_cast_fp16")]; + fp16 var_6384_to_fp16 = const()[name = string("op_6384_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6385_cast_fp16 = add(x = variance_129_cast_fp16, y = var_6384_to_fp16)[name = string("op_6385_cast_fp16")]; + fp32 var_6386_epsilon_0 = const()[name = string("op_6386_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6386_cast_fp16 = rsqrt(epsilon = var_6386_epsilon_0, x = var_6385_cast_fp16)[name = string("op_6386_cast_fp16")]; + tensor hidden_states_161_cast_fp16 = mul(x = inputs_127_cast_fp16, y = var_6386_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; + tensor w_129_to_fp16 = const()[name = string("w_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251881280)))]; + tensor obj_133_cast_fp16 = mul(x = w_129_to_fp16, y = hidden_states_161_cast_fp16)[name = string("obj_133_cast_fp16")]; + string query_97_pad_type_0 = const()[name = string("query_97_pad_type_0"), val = string("valid")]; + tensor query_97_strides_0 = const()[name = string("query_97_strides_0"), val = tensor([1, 1])]; + tensor query_97_pad_0 = const()[name = string("query_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_97_dilations_0 = const()[name = string("query_97_dilations_0"), val = tensor([1, 1])]; + int32 query_97_groups_0 = const()[name = string("query_97_groups_0"), val = int32(1)]; + tensor layers_16_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251883392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253980608))))[name = string("layers_16_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_97_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_97_dilations_0, groups = query_97_groups_0, pad = query_97_pad_0, pad_type = query_97_pad_type_0, strides = query_97_strides_0, weight = layers_16_self_attn_q_proj_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("query_97_cast_fp16")]; + string current_key_65_pad_type_0 = const()[name = string("current_key_65_pad_type_0"), val = string("valid")]; + tensor current_key_65_strides_0 = const()[name = string("current_key_65_strides_0"), val = tensor([1, 1])]; + tensor current_key_65_pad_0 = const()[name = string("current_key_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_65_dilations_0 = const()[name = string("current_key_65_dilations_0"), val = tensor([1, 1])]; + int32 current_key_65_groups_0 = const()[name = string("current_key_65_groups_0"), val = int32(1)]; + tensor layers_16_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253981184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255029824))))[name = string("layers_16_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_65_cast_fp16 = conv(dilations = current_key_65_dilations_0, groups = current_key_65_groups_0, pad = current_key_65_pad_0, pad_type = current_key_65_pad_type_0, strides = current_key_65_strides_0, weight = layers_16_self_attn_k_proj_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("current_key_65_cast_fp16")]; + string current_value_33_pad_type_0 = const()[name = string("current_value_33_pad_type_0"), val = string("valid")]; + tensor current_value_33_strides_0 = const()[name = string("current_value_33_strides_0"), val = tensor([1, 1])]; + tensor current_value_33_pad_0 = const()[name = string("current_value_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_33_dilations_0 = const()[name = string("current_value_33_dilations_0"), val = tensor([1, 1])]; + int32 current_value_33_groups_0 = const()[name = string("current_value_33_groups_0"), val = int32(1)]; + tensor layers_16_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255030400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256079040))))[name = string("layers_16_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_33_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_33_dilations_0, groups = current_value_33_groups_0, pad = current_value_33_pad_0, pad_type = current_value_33_pad_type_0, strides = current_value_33_strides_0, weight = layers_16_self_attn_v_proj_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("current_value_33_cast_fp16")]; + tensor var_6423 = const()[name = string("op_6423"), val = tensor([16, 128, 1, 1])]; + tensor inputs_129_cast_fp16 = reshape(shape = var_6423, x = query_97_cast_fp16)[name = string("inputs_129_cast_fp16")]; + tensor inputs_sq_131_cast_fp16 = mul(x = inputs_129_cast_fp16, y = inputs_129_cast_fp16)[name = string("inputs_sq_131_cast_fp16")]; + tensor variance_131_axes_0 = const()[name = string("variance_131_axes_0"), val = tensor([1])]; + bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; + tensor variance_131_cast_fp16 = reduce_mean(axes = variance_131_axes_0, keep_dims = variance_131_keep_dims_0, x = inputs_sq_131_cast_fp16)[name = string("variance_131_cast_fp16")]; + fp16 var_6429_to_fp16 = const()[name = string("op_6429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6430_cast_fp16 = add(x = variance_131_cast_fp16, y = var_6429_to_fp16)[name = string("op_6430_cast_fp16")]; + fp32 var_6431_epsilon_0 = const()[name = string("op_6431_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6431_cast_fp16 = rsqrt(epsilon = var_6431_epsilon_0, x = var_6430_cast_fp16)[name = string("op_6431_cast_fp16")]; + tensor hidden_states_163_cast_fp16 = mul(x = inputs_129_cast_fp16, y = var_6431_cast_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor w_131_to_fp16 = const()[name = string("w_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256079616)))]; + tensor query_normed_33_cast_fp16 = mul(x = w_131_to_fp16, y = hidden_states_163_cast_fp16)[name = string("query_normed_33_cast_fp16")]; + tensor var_6439 = const()[name = string("op_6439"), val = tensor([8, 128, 1, 1])]; + tensor inputs_131_cast_fp16 = reshape(shape = var_6439, x = current_key_65_cast_fp16)[name = string("inputs_131_cast_fp16")]; + tensor inputs_sq_133_cast_fp16 = mul(x = inputs_131_cast_fp16, y = inputs_131_cast_fp16)[name = string("inputs_sq_133_cast_fp16")]; + tensor variance_133_axes_0 = const()[name = string("variance_133_axes_0"), val = tensor([1])]; + bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; + tensor variance_133_cast_fp16 = reduce_mean(axes = variance_133_axes_0, keep_dims = variance_133_keep_dims_0, x = inputs_sq_133_cast_fp16)[name = string("variance_133_cast_fp16")]; + fp16 var_6445_to_fp16 = const()[name = string("op_6445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6446_cast_fp16 = add(x = variance_133_cast_fp16, y = var_6445_to_fp16)[name = string("op_6446_cast_fp16")]; + fp32 var_6447_epsilon_0 = const()[name = string("op_6447_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6447_cast_fp16 = rsqrt(epsilon = var_6447_epsilon_0, x = var_6446_cast_fp16)[name = string("op_6447_cast_fp16")]; + tensor hidden_states_165_cast_fp16 = mul(x = inputs_131_cast_fp16, y = var_6447_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + tensor w_133_to_fp16 = const()[name = string("w_133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256079936)))]; + tensor current_key_normed_33_cast_fp16 = mul(x = w_133_to_fp16, y = hidden_states_165_cast_fp16)[name = string("current_key_normed_33_cast_fp16")]; + tensor var_6465 = const()[name = string("op_6465"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_97_cast_fp16 = reshape(shape = var_6465, x = query_normed_33_cast_fp16)[name = string("mh_q_97_cast_fp16")]; + tensor var_6467 = const()[name = string("op_6467"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_65_cast_fp16 = reshape(shape = var_6467, x = current_key_normed_33_cast_fp16)[name = string("mh_k_65_cast_fp16")]; + tensor var_6471_cast_fp16 = mul(x = mh_q_97_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6471_cast_fp16")]; + tensor var_6476_begin_0 = const()[name = string("op_6476_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6476_end_0 = const()[name = string("op_6476_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_6476_end_mask_0 = const()[name = string("op_6476_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6476_cast_fp16 = slice_by_index(begin = var_6476_begin_0, end = var_6476_end_0, end_mask = var_6476_end_mask_0, x = mh_q_97_cast_fp16)[name = string("op_6476_cast_fp16")]; + tensor var_6482_begin_0 = const()[name = string("op_6482_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6482_end_0 = const()[name = string("op_6482_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_6482_end_mask_0 = const()[name = string("op_6482_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6482_cast_fp16 = slice_by_index(begin = var_6482_begin_0, end = var_6482_end_0, end_mask = var_6482_end_mask_0, x = mh_q_97_cast_fp16)[name = string("op_6482_cast_fp16")]; + fp16 const_385_promoted_to_fp16 = const()[name = string("const_385_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6484_cast_fp16 = mul(x = var_6482_cast_fp16, y = const_385_promoted_to_fp16)[name = string("op_6484_cast_fp16")]; + bool var_6486_interleave_0 = const()[name = string("op_6486_interleave_0"), val = bool(false)]; + tensor var_6486_cast_fp16 = concat(axis = var_6364, interleave = var_6486_interleave_0, values = (var_6484_cast_fp16, var_6476_cast_fp16))[name = string("op_6486_cast_fp16")]; + tensor var_6487_cast_fp16 = mul(x = var_6486_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6487_cast_fp16")]; + tensor mh_q_99_cast_fp16 = add(x = var_6471_cast_fp16, y = var_6487_cast_fp16)[name = string("mh_q_99_cast_fp16")]; + tensor var_6489_cast_fp16 = mul(x = mh_k_65_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6489_cast_fp16")]; + tensor var_6494_begin_0 = const()[name = string("op_6494_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6494_end_0 = const()[name = string("op_6494_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_6494_end_mask_0 = const()[name = string("op_6494_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6494_cast_fp16 = slice_by_index(begin = var_6494_begin_0, end = var_6494_end_0, end_mask = var_6494_end_mask_0, x = mh_k_65_cast_fp16)[name = string("op_6494_cast_fp16")]; + tensor var_6500_begin_0 = const()[name = string("op_6500_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6500_end_0 = const()[name = string("op_6500_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_6500_end_mask_0 = const()[name = string("op_6500_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6500_cast_fp16 = slice_by_index(begin = var_6500_begin_0, end = var_6500_end_0, end_mask = var_6500_end_mask_0, x = mh_k_65_cast_fp16)[name = string("op_6500_cast_fp16")]; + fp16 const_388_promoted_to_fp16 = const()[name = string("const_388_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6502_cast_fp16 = mul(x = var_6500_cast_fp16, y = const_388_promoted_to_fp16)[name = string("op_6502_cast_fp16")]; + bool var_6504_interleave_0 = const()[name = string("op_6504_interleave_0"), val = bool(false)]; + tensor var_6504_cast_fp16 = concat(axis = var_6364, interleave = var_6504_interleave_0, values = (var_6502_cast_fp16, var_6494_cast_fp16))[name = string("op_6504_cast_fp16")]; + tensor var_6505_cast_fp16 = mul(x = var_6504_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6505_cast_fp16")]; + tensor mh_k_67_cast_fp16 = add(x = var_6489_cast_fp16, y = var_6505_cast_fp16)[name = string("mh_k_67_cast_fp16")]; + tensor var_6509 = const()[name = string("op_6509"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_67_cast_fp16 = reshape(shape = var_6509, x = mh_k_67_cast_fp16)[name = string("current_key_67_cast_fp16")]; + tensor var_6516_cast_fp16 = mul(x = var_101_cast_fp16_16, y = var_323_cast_fp16)[name = string("op_6516_cast_fp16")]; + tensor var_6517_cast_fp16 = mul(x = current_key_67_cast_fp16, y = var_321_cast_fp16)[name = string("op_6517_cast_fp16")]; + tensor key_99_cast_fp16 = add(x = var_6516_cast_fp16, y = var_6517_cast_fp16)[name = string("key_99_cast_fp16")]; + tensor var_6520_cast_fp16 = mul(x = var_132_cast_fp16_16, y = var_323_cast_fp16)[name = string("op_6520_cast_fp16")]; + tensor var_6521_cast_fp16 = mul(x = current_value_33_cast_fp16, y = var_321_cast_fp16)[name = string("op_6521_cast_fp16")]; + tensor value_65_cast_fp16 = add(x = var_6520_cast_fp16, y = var_6521_cast_fp16)[name = string("value_65_cast_fp16")]; + tensor var_6525 = const()[name = string("op_6525"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_65_cast_fp16 = reshape(shape = var_6525, x = key_99_cast_fp16)[name = string("key_heads_65_cast_fp16")]; + tensor var_6527 = const()[name = string("op_6527"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_65_cast_fp16 = reshape(shape = var_6527, x = value_65_cast_fp16)[name = string("value_heads_65_cast_fp16")]; + tensor var_6530_begin_0 = const()[name = string("op_6530_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6530_end_0 = const()[name = string("op_6530_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6530_end_mask_0 = const()[name = string("op_6530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6530_cast_fp16 = slice_by_index(begin = var_6530_begin_0, end = var_6530_end_0, end_mask = var_6530_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6530_cast_fp16")]; + tensor var_6534_begin_0 = const()[name = string("op_6534_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6534_end_0 = const()[name = string("op_6534_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6534_end_mask_0 = const()[name = string("op_6534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6534_cast_fp16 = slice_by_index(begin = var_6534_begin_0, end = var_6534_end_0, end_mask = var_6534_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6534_cast_fp16")]; + tensor var_6546_begin_0 = const()[name = string("op_6546_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6546_end_0 = const()[name = string("op_6546_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6546_end_mask_0 = const()[name = string("op_6546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6546_cast_fp16 = slice_by_index(begin = var_6546_begin_0, end = var_6546_end_0, end_mask = var_6546_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6546_cast_fp16")]; + tensor var_6550_begin_0 = const()[name = string("op_6550_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6550_end_0 = const()[name = string("op_6550_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6550_end_mask_0 = const()[name = string("op_6550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6550_cast_fp16 = slice_by_index(begin = var_6550_begin_0, end = var_6550_end_0, end_mask = var_6550_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6550_cast_fp16")]; + tensor var_6562_begin_0 = const()[name = string("op_6562_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6562_end_0 = const()[name = string("op_6562_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6562_end_mask_0 = const()[name = string("op_6562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6562_cast_fp16 = slice_by_index(begin = var_6562_begin_0, end = var_6562_end_0, end_mask = var_6562_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6562_cast_fp16")]; + tensor var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6566_end_0 = const()[name = string("op_6566_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = var_6566_end_0, end_mask = var_6566_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6566_cast_fp16")]; + tensor var_6578_begin_0 = const()[name = string("op_6578_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6578_end_0 = const()[name = string("op_6578_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6578_end_mask_0 = const()[name = string("op_6578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6578_cast_fp16 = slice_by_index(begin = var_6578_begin_0, end = var_6578_end_0, end_mask = var_6578_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6578_cast_fp16")]; + tensor var_6582_begin_0 = const()[name = string("op_6582_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6582_end_0 = const()[name = string("op_6582_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6582_end_mask_0 = const()[name = string("op_6582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6582_cast_fp16 = slice_by_index(begin = var_6582_begin_0, end = var_6582_end_0, end_mask = var_6582_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6582_cast_fp16")]; + tensor var_6594_begin_0 = const()[name = string("op_6594_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6594_end_0 = const()[name = string("op_6594_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6594_end_mask_0 = const()[name = string("op_6594_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6594_cast_fp16 = slice_by_index(begin = var_6594_begin_0, end = var_6594_end_0, end_mask = var_6594_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6594_cast_fp16")]; + tensor var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6598_end_0 = const()[name = string("op_6598_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = var_6598_end_0, end_mask = var_6598_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6598_cast_fp16")]; + tensor var_6610_begin_0 = const()[name = string("op_6610_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_6610_end_0 = const()[name = string("op_6610_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_6610_end_mask_0 = const()[name = string("op_6610_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6610_cast_fp16 = slice_by_index(begin = var_6610_begin_0, end = var_6610_end_0, end_mask = var_6610_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6610_cast_fp16")]; + tensor var_6614_begin_0 = const()[name = string("op_6614_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_6614_end_0 = const()[name = string("op_6614_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_6614_end_mask_0 = const()[name = string("op_6614_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6614_cast_fp16 = slice_by_index(begin = var_6614_begin_0, end = var_6614_end_0, end_mask = var_6614_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6614_cast_fp16")]; + tensor var_6626_begin_0 = const()[name = string("op_6626_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_6626_end_0 = const()[name = string("op_6626_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_6626_end_mask_0 = const()[name = string("op_6626_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6626_cast_fp16 = slice_by_index(begin = var_6626_begin_0, end = var_6626_end_0, end_mask = var_6626_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6626_cast_fp16")]; + tensor var_6630_begin_0 = const()[name = string("op_6630_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_6630_end_0 = const()[name = string("op_6630_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_6630_end_mask_0 = const()[name = string("op_6630_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6630_cast_fp16 = slice_by_index(begin = var_6630_begin_0, end = var_6630_end_0, end_mask = var_6630_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6630_cast_fp16")]; + tensor var_6642_begin_0 = const()[name = string("op_6642_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_6642_end_0 = const()[name = string("op_6642_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6642_end_mask_0 = const()[name = string("op_6642_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6642_cast_fp16 = slice_by_index(begin = var_6642_begin_0, end = var_6642_end_0, end_mask = var_6642_end_mask_0, x = key_heads_65_cast_fp16)[name = string("op_6642_cast_fp16")]; + tensor var_6646_begin_0 = const()[name = string("op_6646_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_6646_end_0 = const()[name = string("op_6646_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6646_end_mask_0 = const()[name = string("op_6646_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6646_cast_fp16 = slice_by_index(begin = var_6646_begin_0, end = var_6646_end_0, end_mask = var_6646_end_mask_0, x = value_heads_65_cast_fp16)[name = string("op_6646_cast_fp16")]; + bool key_heads_67_interleave_0 = const()[name = string("key_heads_67_interleave_0"), val = bool(false)]; + tensor key_heads_67_cast_fp16 = concat(axis = var_6372, interleave = key_heads_67_interleave_0, values = (var_6530_cast_fp16, var_6530_cast_fp16, var_6546_cast_fp16, var_6546_cast_fp16, var_6562_cast_fp16, var_6562_cast_fp16, var_6578_cast_fp16, var_6578_cast_fp16, var_6594_cast_fp16, var_6594_cast_fp16, var_6610_cast_fp16, var_6610_cast_fp16, var_6626_cast_fp16, var_6626_cast_fp16, var_6642_cast_fp16, var_6642_cast_fp16))[name = string("key_heads_67_cast_fp16")]; + bool value_heads_67_interleave_0 = const()[name = string("value_heads_67_interleave_0"), val = bool(false)]; + tensor value_heads_67_cast_fp16 = concat(axis = var_6372, interleave = value_heads_67_interleave_0, values = (var_6534_cast_fp16, var_6534_cast_fp16, var_6550_cast_fp16, var_6550_cast_fp16, var_6566_cast_fp16, var_6566_cast_fp16, var_6582_cast_fp16, var_6582_cast_fp16, var_6598_cast_fp16, var_6598_cast_fp16, var_6614_cast_fp16, var_6614_cast_fp16, var_6630_cast_fp16, var_6630_cast_fp16, var_6646_cast_fp16, var_6646_cast_fp16))[name = string("value_heads_67_cast_fp16")]; + fp16 var_6669_to_fp16 = const()[name = string("op_6669_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_6670_cast_fp16 = mul(x = mh_q_99_cast_fp16, y = var_6669_to_fp16)[name = string("op_6670_cast_fp16")]; + bool mh_w_65_transpose_x_0 = const()[name = string("mh_w_65_transpose_x_0"), val = bool(true)]; + bool mh_w_65_transpose_y_0 = const()[name = string("mh_w_65_transpose_y_0"), val = bool(false)]; + tensor mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_6670_cast_fp16, y = key_heads_67_cast_fp16)[name = string("mh_w_65_cast_fp16")]; + tensor mh_w_67_cast_fp16 = add(x = mh_w_65_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_67_cast_fp16")]; + tensor var_6682_cast_fp16 = softmax(axis = var_6354, x = mh_w_67_cast_fp16)[name = string("op_6682_cast_fp16")]; + bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)]; + bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = value_heads_67_cast_fp16, y = var_6682_cast_fp16)[name = string("attn_33_cast_fp16")]; + tensor var_6687 = const()[name = string("op_6687"), val = tensor([1, -1, 1, 1])]; + tensor input_129_cast_fp16 = reshape(shape = var_6687, x = attn_33_cast_fp16)[name = string("input_129_cast_fp16")]; + string obj_139_pad_type_0 = const()[name = string("obj_139_pad_type_0"), val = string("valid")]; + tensor obj_139_strides_0 = const()[name = string("obj_139_strides_0"), val = tensor([1, 1])]; + tensor obj_139_pad_0 = const()[name = string("obj_139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_139_dilations_0 = const()[name = string("obj_139_dilations_0"), val = tensor([1, 1])]; + int32 obj_139_groups_0 = const()[name = string("obj_139_groups_0"), val = int32(1)]; + tensor layers_16_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256080256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258177472))))[name = string("layers_16_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_139_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_139_dilations_0, groups = obj_139_groups_0, pad = obj_139_pad_0, pad_type = obj_139_pad_type_0, strides = obj_139_strides_0, weight = layers_16_self_attn_o_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("obj_139_cast_fp16")]; + tensor inputs_133_cast_fp16 = add(x = inputs_127_cast_fp16, y = obj_139_cast_fp16)[name = string("inputs_133_cast_fp16")]; + tensor inputs_sq_135_cast_fp16 = mul(x = inputs_133_cast_fp16, y = inputs_133_cast_fp16)[name = string("inputs_sq_135_cast_fp16")]; + tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([1])]; + bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; + tensor variance_135_cast_fp16 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = inputs_sq_135_cast_fp16)[name = string("variance_135_cast_fp16")]; + fp16 var_6705_to_fp16 = const()[name = string("op_6705_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6706_cast_fp16 = add(x = variance_135_cast_fp16, y = var_6705_to_fp16)[name = string("op_6706_cast_fp16")]; + fp32 var_6707_epsilon_0 = const()[name = string("op_6707_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6707_cast_fp16 = rsqrt(epsilon = var_6707_epsilon_0, x = var_6706_cast_fp16)[name = string("op_6707_cast_fp16")]; + tensor hidden_states_167_cast_fp16 = mul(x = inputs_133_cast_fp16, y = var_6707_cast_fp16)[name = string("hidden_states_167_cast_fp16")]; + tensor w_135_to_fp16 = const()[name = string("w_135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258178048)))]; + tensor input_131_cast_fp16 = mul(x = w_135_to_fp16, y = hidden_states_167_cast_fp16)[name = string("input_131_cast_fp16")]; + string input_133_pad_type_0 = const()[name = string("input_133_pad_type_0"), val = string("valid")]; + tensor input_133_strides_0 = const()[name = string("input_133_strides_0"), val = tensor([1, 1])]; + tensor input_133_pad_0 = const()[name = string("input_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_133_dilations_0 = const()[name = string("input_133_dilations_0"), val = tensor([1, 1])]; + int32 input_133_groups_0 = const()[name = string("input_133_groups_0"), val = int32(1)]; + tensor layers_16_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258180160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261325952))))[name = string("layers_16_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_133_cast_fp16 = conv(dilations = input_133_dilations_0, groups = input_133_groups_0, pad = input_133_pad_0, pad_type = input_133_pad_type_0, strides = input_133_strides_0, weight = layers_16_mlp_gate_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")]; + tensor var_6721_cast_fp16 = silu(x = input_133_cast_fp16)[name = string("op_6721_cast_fp16")]; + string var_6727_pad_type_0 = const()[name = string("op_6727_pad_type_0"), val = string("valid")]; + tensor var_6727_strides_0 = const()[name = string("op_6727_strides_0"), val = tensor([1, 1])]; + tensor var_6727_pad_0 = const()[name = string("op_6727_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6727_dilations_0 = const()[name = string("op_6727_dilations_0"), val = tensor([1, 1])]; + int32 var_6727_groups_0 = const()[name = string("op_6727_groups_0"), val = int32(1)]; + tensor layers_16_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261326528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264472320))))[name = string("layers_16_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_6727_cast_fp16 = conv(dilations = var_6727_dilations_0, groups = var_6727_groups_0, pad = var_6727_pad_0, pad_type = var_6727_pad_type_0, strides = var_6727_strides_0, weight = layers_16_mlp_up_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("op_6727_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_6721_cast_fp16, y = var_6727_cast_fp16)[name = string("input_135_cast_fp16")]; + string hidden_states_169_pad_type_0 = const()[name = string("hidden_states_169_pad_type_0"), val = string("valid")]; + tensor hidden_states_169_strides_0 = const()[name = string("hidden_states_169_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_169_pad_0 = const()[name = string("hidden_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_169_dilations_0 = const()[name = string("hidden_states_169_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_169_groups_0 = const()[name = string("hidden_states_169_groups_0"), val = int32(1)]; + tensor layers_16_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264472896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267618688))))[name = string("layers_16_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_169_cast_fp16 = conv(dilations = hidden_states_169_dilations_0, groups = hidden_states_169_groups_0, pad = hidden_states_169_pad_0, pad_type = hidden_states_169_pad_type_0, strides = hidden_states_169_strides_0, weight = layers_16_mlp_down_proj_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + tensor inputs_135_cast_fp16 = add(x = inputs_133_cast_fp16, y = hidden_states_169_cast_fp16)[name = string("inputs_135_cast_fp16")]; + int32 var_6741 = const()[name = string("op_6741"), val = int32(3)]; + int32 var_6751 = const()[name = string("op_6751"), val = int32(-2)]; + int32 var_6759 = const()[name = string("op_6759"), val = int32(1)]; + tensor inputs_sq_137_cast_fp16 = mul(x = inputs_135_cast_fp16, y = inputs_135_cast_fp16)[name = string("inputs_sq_137_cast_fp16")]; + tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([1])]; + bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; + tensor variance_137_cast_fp16 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = inputs_sq_137_cast_fp16)[name = string("variance_137_cast_fp16")]; + fp16 var_6771_to_fp16 = const()[name = string("op_6771_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6772_cast_fp16 = add(x = variance_137_cast_fp16, y = var_6771_to_fp16)[name = string("op_6772_cast_fp16")]; + fp32 var_6773_epsilon_0 = const()[name = string("op_6773_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6773_cast_fp16 = rsqrt(epsilon = var_6773_epsilon_0, x = var_6772_cast_fp16)[name = string("op_6773_cast_fp16")]; + tensor hidden_states_171_cast_fp16 = mul(x = inputs_135_cast_fp16, y = var_6773_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; + tensor w_137_to_fp16 = const()[name = string("w_137_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267619264)))]; + tensor obj_141_cast_fp16 = mul(x = w_137_to_fp16, y = hidden_states_171_cast_fp16)[name = string("obj_141_cast_fp16")]; + string query_103_pad_type_0 = const()[name = string("query_103_pad_type_0"), val = string("valid")]; + tensor query_103_strides_0 = const()[name = string("query_103_strides_0"), val = tensor([1, 1])]; + tensor query_103_pad_0 = const()[name = string("query_103_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_103_dilations_0 = const()[name = string("query_103_dilations_0"), val = tensor([1, 1])]; + int32 query_103_groups_0 = const()[name = string("query_103_groups_0"), val = int32(1)]; + tensor layers_17_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267621376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269718592))))[name = string("layers_17_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_103_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_103_dilations_0, groups = query_103_groups_0, pad = query_103_pad_0, pad_type = query_103_pad_type_0, strides = query_103_strides_0, weight = layers_17_self_attn_q_proj_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("query_103_cast_fp16")]; + string current_key_69_pad_type_0 = const()[name = string("current_key_69_pad_type_0"), val = string("valid")]; + tensor current_key_69_strides_0 = const()[name = string("current_key_69_strides_0"), val = tensor([1, 1])]; + tensor current_key_69_pad_0 = const()[name = string("current_key_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_69_dilations_0 = const()[name = string("current_key_69_dilations_0"), val = tensor([1, 1])]; + int32 current_key_69_groups_0 = const()[name = string("current_key_69_groups_0"), val = int32(1)]; + tensor layers_17_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269719168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270767808))))[name = string("layers_17_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_69_cast_fp16 = conv(dilations = current_key_69_dilations_0, groups = current_key_69_groups_0, pad = current_key_69_pad_0, pad_type = current_key_69_pad_type_0, strides = current_key_69_strides_0, weight = layers_17_self_attn_k_proj_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("current_key_69_cast_fp16")]; + string current_value_35_pad_type_0 = const()[name = string("current_value_35_pad_type_0"), val = string("valid")]; + tensor current_value_35_strides_0 = const()[name = string("current_value_35_strides_0"), val = tensor([1, 1])]; + tensor current_value_35_pad_0 = const()[name = string("current_value_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_35_dilations_0 = const()[name = string("current_value_35_dilations_0"), val = tensor([1, 1])]; + int32 current_value_35_groups_0 = const()[name = string("current_value_35_groups_0"), val = int32(1)]; + tensor layers_17_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270768384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271817024))))[name = string("layers_17_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_35_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_35_dilations_0, groups = current_value_35_groups_0, pad = current_value_35_pad_0, pad_type = current_value_35_pad_type_0, strides = current_value_35_strides_0, weight = layers_17_self_attn_v_proj_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("current_value_35_cast_fp16")]; + tensor var_6810 = const()[name = string("op_6810"), val = tensor([16, 128, 1, 1])]; + tensor inputs_137_cast_fp16 = reshape(shape = var_6810, x = query_103_cast_fp16)[name = string("inputs_137_cast_fp16")]; + tensor inputs_sq_139_cast_fp16 = mul(x = inputs_137_cast_fp16, y = inputs_137_cast_fp16)[name = string("inputs_sq_139_cast_fp16")]; + tensor variance_139_axes_0 = const()[name = string("variance_139_axes_0"), val = tensor([1])]; + bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; + tensor variance_139_cast_fp16 = reduce_mean(axes = variance_139_axes_0, keep_dims = variance_139_keep_dims_0, x = inputs_sq_139_cast_fp16)[name = string("variance_139_cast_fp16")]; + fp16 var_6816_to_fp16 = const()[name = string("op_6816_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6817_cast_fp16 = add(x = variance_139_cast_fp16, y = var_6816_to_fp16)[name = string("op_6817_cast_fp16")]; + fp32 var_6818_epsilon_0 = const()[name = string("op_6818_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6818_cast_fp16 = rsqrt(epsilon = var_6818_epsilon_0, x = var_6817_cast_fp16)[name = string("op_6818_cast_fp16")]; + tensor hidden_states_173_cast_fp16 = mul(x = inputs_137_cast_fp16, y = var_6818_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor w_139_to_fp16 = const()[name = string("w_139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271817600)))]; + tensor query_normed_35_cast_fp16 = mul(x = w_139_to_fp16, y = hidden_states_173_cast_fp16)[name = string("query_normed_35_cast_fp16")]; + tensor var_6826 = const()[name = string("op_6826"), val = tensor([8, 128, 1, 1])]; + tensor inputs_139_cast_fp16 = reshape(shape = var_6826, x = current_key_69_cast_fp16)[name = string("inputs_139_cast_fp16")]; + tensor inputs_sq_141_cast_fp16 = mul(x = inputs_139_cast_fp16, y = inputs_139_cast_fp16)[name = string("inputs_sq_141_cast_fp16")]; + tensor variance_141_axes_0 = const()[name = string("variance_141_axes_0"), val = tensor([1])]; + bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; + tensor variance_141_cast_fp16 = reduce_mean(axes = variance_141_axes_0, keep_dims = variance_141_keep_dims_0, x = inputs_sq_141_cast_fp16)[name = string("variance_141_cast_fp16")]; + fp16 var_6832_to_fp16 = const()[name = string("op_6832_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_6833_cast_fp16 = add(x = variance_141_cast_fp16, y = var_6832_to_fp16)[name = string("op_6833_cast_fp16")]; + fp32 var_6834_epsilon_0 = const()[name = string("op_6834_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6834_cast_fp16 = rsqrt(epsilon = var_6834_epsilon_0, x = var_6833_cast_fp16)[name = string("op_6834_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = mul(x = inputs_139_cast_fp16, y = var_6834_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + tensor w_141_to_fp16 = const()[name = string("w_141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271817920)))]; + tensor current_key_normed_35_cast_fp16 = mul(x = w_141_to_fp16, y = hidden_states_175_cast_fp16)[name = string("current_key_normed_35_cast_fp16")]; + tensor var_6852 = const()[name = string("op_6852"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_103_cast_fp16 = reshape(shape = var_6852, x = query_normed_35_cast_fp16)[name = string("mh_q_103_cast_fp16")]; + tensor var_6854 = const()[name = string("op_6854"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_69_cast_fp16 = reshape(shape = var_6854, x = current_key_normed_35_cast_fp16)[name = string("mh_k_69_cast_fp16")]; + tensor var_6858_cast_fp16 = mul(x = mh_q_103_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6858_cast_fp16")]; + tensor var_6863_begin_0 = const()[name = string("op_6863_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6863_end_0 = const()[name = string("op_6863_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_6863_end_mask_0 = const()[name = string("op_6863_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6863_cast_fp16 = slice_by_index(begin = var_6863_begin_0, end = var_6863_end_0, end_mask = var_6863_end_mask_0, x = mh_q_103_cast_fp16)[name = string("op_6863_cast_fp16")]; + tensor var_6869_begin_0 = const()[name = string("op_6869_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6869_end_0 = const()[name = string("op_6869_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_6869_end_mask_0 = const()[name = string("op_6869_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6869_cast_fp16 = slice_by_index(begin = var_6869_begin_0, end = var_6869_end_0, end_mask = var_6869_end_mask_0, x = mh_q_103_cast_fp16)[name = string("op_6869_cast_fp16")]; + fp16 const_408_promoted_to_fp16 = const()[name = string("const_408_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6871_cast_fp16 = mul(x = var_6869_cast_fp16, y = const_408_promoted_to_fp16)[name = string("op_6871_cast_fp16")]; + bool var_6873_interleave_0 = const()[name = string("op_6873_interleave_0"), val = bool(false)]; + tensor var_6873_cast_fp16 = concat(axis = var_6751, interleave = var_6873_interleave_0, values = (var_6871_cast_fp16, var_6863_cast_fp16))[name = string("op_6873_cast_fp16")]; + tensor var_6874_cast_fp16 = mul(x = var_6873_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6874_cast_fp16")]; + tensor mh_q_105_cast_fp16 = add(x = var_6858_cast_fp16, y = var_6874_cast_fp16)[name = string("mh_q_105_cast_fp16")]; + tensor var_6876_cast_fp16 = mul(x = mh_k_69_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6876_cast_fp16")]; + tensor var_6881_begin_0 = const()[name = string("op_6881_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6881_end_0 = const()[name = string("op_6881_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_6881_end_mask_0 = const()[name = string("op_6881_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_6881_cast_fp16 = slice_by_index(begin = var_6881_begin_0, end = var_6881_end_0, end_mask = var_6881_end_mask_0, x = mh_k_69_cast_fp16)[name = string("op_6881_cast_fp16")]; + tensor var_6887_begin_0 = const()[name = string("op_6887_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_6887_end_0 = const()[name = string("op_6887_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_6887_end_mask_0 = const()[name = string("op_6887_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6887_cast_fp16 = slice_by_index(begin = var_6887_begin_0, end = var_6887_end_0, end_mask = var_6887_end_mask_0, x = mh_k_69_cast_fp16)[name = string("op_6887_cast_fp16")]; + fp16 const_411_promoted_to_fp16 = const()[name = string("const_411_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6889_cast_fp16 = mul(x = var_6887_cast_fp16, y = const_411_promoted_to_fp16)[name = string("op_6889_cast_fp16")]; + bool var_6891_interleave_0 = const()[name = string("op_6891_interleave_0"), val = bool(false)]; + tensor var_6891_cast_fp16 = concat(axis = var_6751, interleave = var_6891_interleave_0, values = (var_6889_cast_fp16, var_6881_cast_fp16))[name = string("op_6891_cast_fp16")]; + tensor var_6892_cast_fp16 = mul(x = var_6891_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6892_cast_fp16")]; + tensor mh_k_71_cast_fp16 = add(x = var_6876_cast_fp16, y = var_6892_cast_fp16)[name = string("mh_k_71_cast_fp16")]; + tensor var_6896 = const()[name = string("op_6896"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_71_cast_fp16 = reshape(shape = var_6896, x = mh_k_71_cast_fp16)[name = string("current_key_71_cast_fp16")]; + tensor var_6903_cast_fp16 = mul(x = var_101_cast_fp16_17, y = var_323_cast_fp16)[name = string("op_6903_cast_fp16")]; + tensor var_6904_cast_fp16 = mul(x = current_key_71_cast_fp16, y = var_321_cast_fp16)[name = string("op_6904_cast_fp16")]; + tensor key_105_cast_fp16 = add(x = var_6903_cast_fp16, y = var_6904_cast_fp16)[name = string("key_105_cast_fp16")]; + tensor var_6907_cast_fp16 = mul(x = var_132_cast_fp16_17, y = var_323_cast_fp16)[name = string("op_6907_cast_fp16")]; + tensor var_6908_cast_fp16 = mul(x = current_value_35_cast_fp16, y = var_321_cast_fp16)[name = string("op_6908_cast_fp16")]; + tensor value_69_cast_fp16 = add(x = var_6907_cast_fp16, y = var_6908_cast_fp16)[name = string("value_69_cast_fp16")]; + tensor var_6912 = const()[name = string("op_6912"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_69_cast_fp16 = reshape(shape = var_6912, x = key_105_cast_fp16)[name = string("key_heads_69_cast_fp16")]; + tensor var_6914 = const()[name = string("op_6914"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_69_cast_fp16 = reshape(shape = var_6914, x = value_69_cast_fp16)[name = string("value_heads_69_cast_fp16")]; + tensor var_6917_begin_0 = const()[name = string("op_6917_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6917_end_0 = const()[name = string("op_6917_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6917_end_mask_0 = const()[name = string("op_6917_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6917_cast_fp16 = slice_by_index(begin = var_6917_begin_0, end = var_6917_end_0, end_mask = var_6917_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6917_cast_fp16")]; + tensor var_6921_begin_0 = const()[name = string("op_6921_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6921_end_0 = const()[name = string("op_6921_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_6921_end_mask_0 = const()[name = string("op_6921_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6921_cast_fp16 = slice_by_index(begin = var_6921_begin_0, end = var_6921_end_0, end_mask = var_6921_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_6921_cast_fp16")]; + tensor var_6933_begin_0 = const()[name = string("op_6933_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6933_end_0 = const()[name = string("op_6933_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6933_end_mask_0 = const()[name = string("op_6933_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6933_cast_fp16 = slice_by_index(begin = var_6933_begin_0, end = var_6933_end_0, end_mask = var_6933_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6933_cast_fp16")]; + tensor var_6937_begin_0 = const()[name = string("op_6937_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_6937_end_0 = const()[name = string("op_6937_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_6937_end_mask_0 = const()[name = string("op_6937_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6937_cast_fp16 = slice_by_index(begin = var_6937_begin_0, end = var_6937_end_0, end_mask = var_6937_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_6937_cast_fp16")]; + tensor var_6949_begin_0 = const()[name = string("op_6949_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6949_end_0 = const()[name = string("op_6949_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6949_end_mask_0 = const()[name = string("op_6949_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6949_cast_fp16 = slice_by_index(begin = var_6949_begin_0, end = var_6949_end_0, end_mask = var_6949_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6949_cast_fp16")]; + tensor var_6953_begin_0 = const()[name = string("op_6953_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_6953_end_0 = const()[name = string("op_6953_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_6953_end_mask_0 = const()[name = string("op_6953_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6953_cast_fp16 = slice_by_index(begin = var_6953_begin_0, end = var_6953_end_0, end_mask = var_6953_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_6953_cast_fp16")]; + tensor var_6965_begin_0 = const()[name = string("op_6965_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6965_end_0 = const()[name = string("op_6965_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6965_end_mask_0 = const()[name = string("op_6965_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6965_cast_fp16 = slice_by_index(begin = var_6965_begin_0, end = var_6965_end_0, end_mask = var_6965_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6965_cast_fp16")]; + tensor var_6969_begin_0 = const()[name = string("op_6969_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_6969_end_0 = const()[name = string("op_6969_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_6969_end_mask_0 = const()[name = string("op_6969_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6969_cast_fp16 = slice_by_index(begin = var_6969_begin_0, end = var_6969_end_0, end_mask = var_6969_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_6969_cast_fp16")]; + tensor var_6981_begin_0 = const()[name = string("op_6981_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6981_end_0 = const()[name = string("op_6981_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6981_end_mask_0 = const()[name = string("op_6981_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6981_cast_fp16 = slice_by_index(begin = var_6981_begin_0, end = var_6981_end_0, end_mask = var_6981_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6981_cast_fp16")]; + tensor var_6985_begin_0 = const()[name = string("op_6985_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_6985_end_0 = const()[name = string("op_6985_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_6985_end_mask_0 = const()[name = string("op_6985_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6985_cast_fp16 = slice_by_index(begin = var_6985_begin_0, end = var_6985_end_0, end_mask = var_6985_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_6985_cast_fp16")]; + tensor var_6997_begin_0 = const()[name = string("op_6997_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_6997_end_0 = const()[name = string("op_6997_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_6997_end_mask_0 = const()[name = string("op_6997_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6997_cast_fp16 = slice_by_index(begin = var_6997_begin_0, end = var_6997_end_0, end_mask = var_6997_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_6997_cast_fp16")]; + tensor var_7001_begin_0 = const()[name = string("op_7001_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_7001_end_0 = const()[name = string("op_7001_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_7001_end_mask_0 = const()[name = string("op_7001_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7001_cast_fp16 = slice_by_index(begin = var_7001_begin_0, end = var_7001_end_0, end_mask = var_7001_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_7001_cast_fp16")]; + tensor var_7013_begin_0 = const()[name = string("op_7013_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7013_end_0 = const()[name = string("op_7013_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7013_end_mask_0 = const()[name = string("op_7013_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7013_cast_fp16 = slice_by_index(begin = var_7013_begin_0, end = var_7013_end_0, end_mask = var_7013_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_7013_cast_fp16")]; + tensor var_7017_begin_0 = const()[name = string("op_7017_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7017_end_0 = const()[name = string("op_7017_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7017_end_mask_0 = const()[name = string("op_7017_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7017_cast_fp16 = slice_by_index(begin = var_7017_begin_0, end = var_7017_end_0, end_mask = var_7017_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_7017_cast_fp16")]; + tensor var_7029_begin_0 = const()[name = string("op_7029_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7029_end_0 = const()[name = string("op_7029_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7029_end_mask_0 = const()[name = string("op_7029_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7029_cast_fp16 = slice_by_index(begin = var_7029_begin_0, end = var_7029_end_0, end_mask = var_7029_end_mask_0, x = key_heads_69_cast_fp16)[name = string("op_7029_cast_fp16")]; + tensor var_7033_begin_0 = const()[name = string("op_7033_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7033_end_0 = const()[name = string("op_7033_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7033_end_mask_0 = const()[name = string("op_7033_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7033_cast_fp16 = slice_by_index(begin = var_7033_begin_0, end = var_7033_end_0, end_mask = var_7033_end_mask_0, x = value_heads_69_cast_fp16)[name = string("op_7033_cast_fp16")]; + bool key_heads_71_interleave_0 = const()[name = string("key_heads_71_interleave_0"), val = bool(false)]; + tensor key_heads_71_cast_fp16 = concat(axis = var_6759, interleave = key_heads_71_interleave_0, values = (var_6917_cast_fp16, var_6917_cast_fp16, var_6933_cast_fp16, var_6933_cast_fp16, var_6949_cast_fp16, var_6949_cast_fp16, var_6965_cast_fp16, var_6965_cast_fp16, var_6981_cast_fp16, var_6981_cast_fp16, var_6997_cast_fp16, var_6997_cast_fp16, var_7013_cast_fp16, var_7013_cast_fp16, var_7029_cast_fp16, var_7029_cast_fp16))[name = string("key_heads_71_cast_fp16")]; + bool value_heads_71_interleave_0 = const()[name = string("value_heads_71_interleave_0"), val = bool(false)]; + tensor value_heads_71_cast_fp16 = concat(axis = var_6759, interleave = value_heads_71_interleave_0, values = (var_6921_cast_fp16, var_6921_cast_fp16, var_6937_cast_fp16, var_6937_cast_fp16, var_6953_cast_fp16, var_6953_cast_fp16, var_6969_cast_fp16, var_6969_cast_fp16, var_6985_cast_fp16, var_6985_cast_fp16, var_7001_cast_fp16, var_7001_cast_fp16, var_7017_cast_fp16, var_7017_cast_fp16, var_7033_cast_fp16, var_7033_cast_fp16))[name = string("value_heads_71_cast_fp16")]; + fp16 var_7056_to_fp16 = const()[name = string("op_7056_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_7057_cast_fp16 = mul(x = mh_q_105_cast_fp16, y = var_7056_to_fp16)[name = string("op_7057_cast_fp16")]; + bool mh_w_69_transpose_x_0 = const()[name = string("mh_w_69_transpose_x_0"), val = bool(true)]; + bool mh_w_69_transpose_y_0 = const()[name = string("mh_w_69_transpose_y_0"), val = bool(false)]; + tensor mh_w_69_cast_fp16 = matmul(transpose_x = mh_w_69_transpose_x_0, transpose_y = mh_w_69_transpose_y_0, x = var_7057_cast_fp16, y = key_heads_71_cast_fp16)[name = string("mh_w_69_cast_fp16")]; + tensor mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_71_cast_fp16")]; + tensor var_7069_cast_fp16 = softmax(axis = var_6741, x = mh_w_71_cast_fp16)[name = string("op_7069_cast_fp16")]; + bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)]; + bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = value_heads_71_cast_fp16, y = var_7069_cast_fp16)[name = string("attn_35_cast_fp16")]; + tensor var_7074 = const()[name = string("op_7074"), val = tensor([1, -1, 1, 1])]; + tensor input_137_cast_fp16 = reshape(shape = var_7074, x = attn_35_cast_fp16)[name = string("input_137_cast_fp16")]; + string obj_147_pad_type_0 = const()[name = string("obj_147_pad_type_0"), val = string("valid")]; + tensor obj_147_strides_0 = const()[name = string("obj_147_strides_0"), val = tensor([1, 1])]; + tensor obj_147_pad_0 = const()[name = string("obj_147_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_147_dilations_0 = const()[name = string("obj_147_dilations_0"), val = tensor([1, 1])]; + int32 obj_147_groups_0 = const()[name = string("obj_147_groups_0"), val = int32(1)]; + tensor layers_17_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271818240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273915456))))[name = string("layers_17_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_147_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_147_dilations_0, groups = obj_147_groups_0, pad = obj_147_pad_0, pad_type = obj_147_pad_type_0, strides = obj_147_strides_0, weight = layers_17_self_attn_o_proj_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = string("obj_147_cast_fp16")]; + tensor inputs_141_cast_fp16 = add(x = inputs_135_cast_fp16, y = obj_147_cast_fp16)[name = string("inputs_141_cast_fp16")]; + tensor inputs_sq_143_cast_fp16 = mul(x = inputs_141_cast_fp16, y = inputs_141_cast_fp16)[name = string("inputs_sq_143_cast_fp16")]; + tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([1])]; + bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; + tensor variance_143_cast_fp16 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = inputs_sq_143_cast_fp16)[name = string("variance_143_cast_fp16")]; + fp16 var_7092_to_fp16 = const()[name = string("op_7092_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7093_cast_fp16 = add(x = variance_143_cast_fp16, y = var_7092_to_fp16)[name = string("op_7093_cast_fp16")]; + fp32 var_7094_epsilon_0 = const()[name = string("op_7094_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7094_cast_fp16 = rsqrt(epsilon = var_7094_epsilon_0, x = var_7093_cast_fp16)[name = string("op_7094_cast_fp16")]; + tensor hidden_states_177_cast_fp16 = mul(x = inputs_141_cast_fp16, y = var_7094_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; + tensor w_143_to_fp16 = const()[name = string("w_143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273916032)))]; + tensor input_139_cast_fp16 = mul(x = w_143_to_fp16, y = hidden_states_177_cast_fp16)[name = string("input_139_cast_fp16")]; + string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; + tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; + tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; + int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; + tensor layers_17_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273918144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277063936))))[name = string("layers_17_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_141_cast_fp16 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = layers_17_mlp_gate_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("input_141_cast_fp16")]; + tensor var_7108_cast_fp16 = silu(x = input_141_cast_fp16)[name = string("op_7108_cast_fp16")]; + string var_7114_pad_type_0 = const()[name = string("op_7114_pad_type_0"), val = string("valid")]; + tensor var_7114_strides_0 = const()[name = string("op_7114_strides_0"), val = tensor([1, 1])]; + tensor var_7114_pad_0 = const()[name = string("op_7114_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7114_dilations_0 = const()[name = string("op_7114_dilations_0"), val = tensor([1, 1])]; + int32 var_7114_groups_0 = const()[name = string("op_7114_groups_0"), val = int32(1)]; + tensor layers_17_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277064512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280210304))))[name = string("layers_17_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_7114_cast_fp16 = conv(dilations = var_7114_dilations_0, groups = var_7114_groups_0, pad = var_7114_pad_0, pad_type = var_7114_pad_type_0, strides = var_7114_strides_0, weight = layers_17_mlp_up_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("op_7114_cast_fp16")]; + tensor input_143_cast_fp16 = mul(x = var_7108_cast_fp16, y = var_7114_cast_fp16)[name = string("input_143_cast_fp16")]; + string hidden_states_179_pad_type_0 = const()[name = string("hidden_states_179_pad_type_0"), val = string("valid")]; + tensor hidden_states_179_strides_0 = const()[name = string("hidden_states_179_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_179_pad_0 = const()[name = string("hidden_states_179_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_179_dilations_0 = const()[name = string("hidden_states_179_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_179_groups_0 = const()[name = string("hidden_states_179_groups_0"), val = int32(1)]; + tensor layers_17_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280210880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283356672))))[name = string("layers_17_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_179_cast_fp16 = conv(dilations = hidden_states_179_dilations_0, groups = hidden_states_179_groups_0, pad = hidden_states_179_pad_0, pad_type = hidden_states_179_pad_type_0, strides = hidden_states_179_strides_0, weight = layers_17_mlp_down_proj_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor inputs_143_cast_fp16 = add(x = inputs_141_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("inputs_143_cast_fp16")]; + int32 var_7128 = const()[name = string("op_7128"), val = int32(3)]; + int32 var_7138 = const()[name = string("op_7138"), val = int32(-2)]; + int32 var_7146 = const()[name = string("op_7146"), val = int32(1)]; + tensor inputs_sq_145_cast_fp16 = mul(x = inputs_143_cast_fp16, y = inputs_143_cast_fp16)[name = string("inputs_sq_145_cast_fp16")]; + tensor variance_145_axes_0 = const()[name = string("variance_145_axes_0"), val = tensor([1])]; + bool variance_145_keep_dims_0 = const()[name = string("variance_145_keep_dims_0"), val = bool(true)]; + tensor variance_145_cast_fp16 = reduce_mean(axes = variance_145_axes_0, keep_dims = variance_145_keep_dims_0, x = inputs_sq_145_cast_fp16)[name = string("variance_145_cast_fp16")]; + fp16 var_7158_to_fp16 = const()[name = string("op_7158_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7159_cast_fp16 = add(x = variance_145_cast_fp16, y = var_7158_to_fp16)[name = string("op_7159_cast_fp16")]; + fp32 var_7160_epsilon_0 = const()[name = string("op_7160_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7160_cast_fp16 = rsqrt(epsilon = var_7160_epsilon_0, x = var_7159_cast_fp16)[name = string("op_7160_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = mul(x = inputs_143_cast_fp16, y = var_7160_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + tensor w_145_to_fp16 = const()[name = string("w_145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283357248)))]; + tensor obj_149_cast_fp16 = mul(x = w_145_to_fp16, y = hidden_states_181_cast_fp16)[name = string("obj_149_cast_fp16")]; + string query_109_pad_type_0 = const()[name = string("query_109_pad_type_0"), val = string("valid")]; + tensor query_109_strides_0 = const()[name = string("query_109_strides_0"), val = tensor([1, 1])]; + tensor query_109_pad_0 = const()[name = string("query_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_109_dilations_0 = const()[name = string("query_109_dilations_0"), val = tensor([1, 1])]; + int32 query_109_groups_0 = const()[name = string("query_109_groups_0"), val = int32(1)]; + tensor layers_18_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283359360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285456576))))[name = string("layers_18_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_109_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_109_dilations_0, groups = query_109_groups_0, pad = query_109_pad_0, pad_type = query_109_pad_type_0, strides = query_109_strides_0, weight = layers_18_self_attn_q_proj_weight_to_fp16_palettized, x = obj_149_cast_fp16)[name = string("query_109_cast_fp16")]; + string current_key_73_pad_type_0 = const()[name = string("current_key_73_pad_type_0"), val = string("valid")]; + tensor current_key_73_strides_0 = const()[name = string("current_key_73_strides_0"), val = tensor([1, 1])]; + tensor current_key_73_pad_0 = const()[name = string("current_key_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_73_dilations_0 = const()[name = string("current_key_73_dilations_0"), val = tensor([1, 1])]; + int32 current_key_73_groups_0 = const()[name = string("current_key_73_groups_0"), val = int32(1)]; + tensor layers_18_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285457152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286505792))))[name = string("layers_18_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_73_cast_fp16 = conv(dilations = current_key_73_dilations_0, groups = current_key_73_groups_0, pad = current_key_73_pad_0, pad_type = current_key_73_pad_type_0, strides = current_key_73_strides_0, weight = layers_18_self_attn_k_proj_weight_to_fp16_palettized, x = obj_149_cast_fp16)[name = string("current_key_73_cast_fp16")]; + string current_value_37_pad_type_0 = const()[name = string("current_value_37_pad_type_0"), val = string("valid")]; + tensor current_value_37_strides_0 = const()[name = string("current_value_37_strides_0"), val = tensor([1, 1])]; + tensor current_value_37_pad_0 = const()[name = string("current_value_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_37_dilations_0 = const()[name = string("current_value_37_dilations_0"), val = tensor([1, 1])]; + int32 current_value_37_groups_0 = const()[name = string("current_value_37_groups_0"), val = int32(1)]; + tensor layers_18_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286506368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287555008))))[name = string("layers_18_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_37_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_37_dilations_0, groups = current_value_37_groups_0, pad = current_value_37_pad_0, pad_type = current_value_37_pad_type_0, strides = current_value_37_strides_0, weight = layers_18_self_attn_v_proj_weight_to_fp16_palettized, x = obj_149_cast_fp16)[name = string("current_value_37_cast_fp16")]; + tensor var_7197 = const()[name = string("op_7197"), val = tensor([16, 128, 1, 1])]; + tensor inputs_145_cast_fp16 = reshape(shape = var_7197, x = query_109_cast_fp16)[name = string("inputs_145_cast_fp16")]; + tensor inputs_sq_147_cast_fp16 = mul(x = inputs_145_cast_fp16, y = inputs_145_cast_fp16)[name = string("inputs_sq_147_cast_fp16")]; + tensor variance_147_axes_0 = const()[name = string("variance_147_axes_0"), val = tensor([1])]; + bool variance_147_keep_dims_0 = const()[name = string("variance_147_keep_dims_0"), val = bool(true)]; + tensor variance_147_cast_fp16 = reduce_mean(axes = variance_147_axes_0, keep_dims = variance_147_keep_dims_0, x = inputs_sq_147_cast_fp16)[name = string("variance_147_cast_fp16")]; + fp16 var_7203_to_fp16 = const()[name = string("op_7203_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7204_cast_fp16 = add(x = variance_147_cast_fp16, y = var_7203_to_fp16)[name = string("op_7204_cast_fp16")]; + fp32 var_7205_epsilon_0 = const()[name = string("op_7205_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7205_cast_fp16 = rsqrt(epsilon = var_7205_epsilon_0, x = var_7204_cast_fp16)[name = string("op_7205_cast_fp16")]; + tensor hidden_states_183_cast_fp16 = mul(x = inputs_145_cast_fp16, y = var_7205_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor w_147_to_fp16 = const()[name = string("w_147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287555584)))]; + tensor query_normed_37_cast_fp16 = mul(x = w_147_to_fp16, y = hidden_states_183_cast_fp16)[name = string("query_normed_37_cast_fp16")]; + tensor var_7213 = const()[name = string("op_7213"), val = tensor([8, 128, 1, 1])]; + tensor inputs_147_cast_fp16 = reshape(shape = var_7213, x = current_key_73_cast_fp16)[name = string("inputs_147_cast_fp16")]; + tensor inputs_sq_149_cast_fp16 = mul(x = inputs_147_cast_fp16, y = inputs_147_cast_fp16)[name = string("inputs_sq_149_cast_fp16")]; + tensor variance_149_axes_0 = const()[name = string("variance_149_axes_0"), val = tensor([1])]; + bool variance_149_keep_dims_0 = const()[name = string("variance_149_keep_dims_0"), val = bool(true)]; + tensor variance_149_cast_fp16 = reduce_mean(axes = variance_149_axes_0, keep_dims = variance_149_keep_dims_0, x = inputs_sq_149_cast_fp16)[name = string("variance_149_cast_fp16")]; + fp16 var_7219_to_fp16 = const()[name = string("op_7219_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7220_cast_fp16 = add(x = variance_149_cast_fp16, y = var_7219_to_fp16)[name = string("op_7220_cast_fp16")]; + fp32 var_7221_epsilon_0 = const()[name = string("op_7221_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7221_cast_fp16 = rsqrt(epsilon = var_7221_epsilon_0, x = var_7220_cast_fp16)[name = string("op_7221_cast_fp16")]; + tensor hidden_states_185_cast_fp16 = mul(x = inputs_147_cast_fp16, y = var_7221_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; + tensor w_149_to_fp16 = const()[name = string("w_149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287555904)))]; + tensor current_key_normed_37_cast_fp16 = mul(x = w_149_to_fp16, y = hidden_states_185_cast_fp16)[name = string("current_key_normed_37_cast_fp16")]; + tensor var_7239 = const()[name = string("op_7239"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_109_cast_fp16 = reshape(shape = var_7239, x = query_normed_37_cast_fp16)[name = string("mh_q_109_cast_fp16")]; + tensor var_7241 = const()[name = string("op_7241"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_73_cast_fp16 = reshape(shape = var_7241, x = current_key_normed_37_cast_fp16)[name = string("mh_k_73_cast_fp16")]; + tensor var_7245_cast_fp16 = mul(x = mh_q_109_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7245_cast_fp16")]; + tensor var_7250_begin_0 = const()[name = string("op_7250_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7250_end_0 = const()[name = string("op_7250_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_7250_end_mask_0 = const()[name = string("op_7250_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_7250_cast_fp16 = slice_by_index(begin = var_7250_begin_0, end = var_7250_end_0, end_mask = var_7250_end_mask_0, x = mh_q_109_cast_fp16)[name = string("op_7250_cast_fp16")]; + tensor var_7256_begin_0 = const()[name = string("op_7256_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7256_end_0 = const()[name = string("op_7256_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_7256_end_mask_0 = const()[name = string("op_7256_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7256_cast_fp16 = slice_by_index(begin = var_7256_begin_0, end = var_7256_end_0, end_mask = var_7256_end_mask_0, x = mh_q_109_cast_fp16)[name = string("op_7256_cast_fp16")]; + fp16 const_431_promoted_to_fp16 = const()[name = string("const_431_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7258_cast_fp16 = mul(x = var_7256_cast_fp16, y = const_431_promoted_to_fp16)[name = string("op_7258_cast_fp16")]; + bool var_7260_interleave_0 = const()[name = string("op_7260_interleave_0"), val = bool(false)]; + tensor var_7260_cast_fp16 = concat(axis = var_7138, interleave = var_7260_interleave_0, values = (var_7258_cast_fp16, var_7250_cast_fp16))[name = string("op_7260_cast_fp16")]; + tensor var_7261_cast_fp16 = mul(x = var_7260_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7261_cast_fp16")]; + tensor mh_q_111_cast_fp16 = add(x = var_7245_cast_fp16, y = var_7261_cast_fp16)[name = string("mh_q_111_cast_fp16")]; + tensor var_7263_cast_fp16 = mul(x = mh_k_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7263_cast_fp16")]; + tensor var_7268_begin_0 = const()[name = string("op_7268_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7268_end_0 = const()[name = string("op_7268_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_7268_end_mask_0 = const()[name = string("op_7268_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_7268_cast_fp16 = slice_by_index(begin = var_7268_begin_0, end = var_7268_end_0, end_mask = var_7268_end_mask_0, x = mh_k_73_cast_fp16)[name = string("op_7268_cast_fp16")]; + tensor var_7274_begin_0 = const()[name = string("op_7274_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7274_end_0 = const()[name = string("op_7274_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_7274_end_mask_0 = const()[name = string("op_7274_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7274_cast_fp16 = slice_by_index(begin = var_7274_begin_0, end = var_7274_end_0, end_mask = var_7274_end_mask_0, x = mh_k_73_cast_fp16)[name = string("op_7274_cast_fp16")]; + fp16 const_434_promoted_to_fp16 = const()[name = string("const_434_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7276_cast_fp16 = mul(x = var_7274_cast_fp16, y = const_434_promoted_to_fp16)[name = string("op_7276_cast_fp16")]; + bool var_7278_interleave_0 = const()[name = string("op_7278_interleave_0"), val = bool(false)]; + tensor var_7278_cast_fp16 = concat(axis = var_7138, interleave = var_7278_interleave_0, values = (var_7276_cast_fp16, var_7268_cast_fp16))[name = string("op_7278_cast_fp16")]; + tensor var_7279_cast_fp16 = mul(x = var_7278_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7279_cast_fp16")]; + tensor mh_k_75_cast_fp16 = add(x = var_7263_cast_fp16, y = var_7279_cast_fp16)[name = string("mh_k_75_cast_fp16")]; + tensor var_7283 = const()[name = string("op_7283"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_75_cast_fp16 = reshape(shape = var_7283, x = mh_k_75_cast_fp16)[name = string("current_key_75_cast_fp16")]; + tensor var_7290_cast_fp16 = mul(x = var_101_cast_fp16_18, y = var_323_cast_fp16)[name = string("op_7290_cast_fp16")]; + tensor var_7291_cast_fp16 = mul(x = current_key_75_cast_fp16, y = var_321_cast_fp16)[name = string("op_7291_cast_fp16")]; + tensor key_111_cast_fp16 = add(x = var_7290_cast_fp16, y = var_7291_cast_fp16)[name = string("key_111_cast_fp16")]; + tensor var_7294_cast_fp16 = mul(x = var_132_cast_fp16_18, y = var_323_cast_fp16)[name = string("op_7294_cast_fp16")]; + tensor var_7295_cast_fp16 = mul(x = current_value_37_cast_fp16, y = var_321_cast_fp16)[name = string("op_7295_cast_fp16")]; + tensor value_73_cast_fp16 = add(x = var_7294_cast_fp16, y = var_7295_cast_fp16)[name = string("value_73_cast_fp16")]; + tensor var_7299 = const()[name = string("op_7299"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_73_cast_fp16 = reshape(shape = var_7299, x = key_111_cast_fp16)[name = string("key_heads_73_cast_fp16")]; + tensor var_7301 = const()[name = string("op_7301"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_73_cast_fp16 = reshape(shape = var_7301, x = value_73_cast_fp16)[name = string("value_heads_73_cast_fp16")]; + tensor var_7304_begin_0 = const()[name = string("op_7304_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7304_end_0 = const()[name = string("op_7304_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7304_end_mask_0 = const()[name = string("op_7304_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7304_cast_fp16 = slice_by_index(begin = var_7304_begin_0, end = var_7304_end_0, end_mask = var_7304_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7304_cast_fp16")]; + tensor var_7308_begin_0 = const()[name = string("op_7308_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7308_end_0 = const()[name = string("op_7308_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7308_end_mask_0 = const()[name = string("op_7308_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7308_cast_fp16 = slice_by_index(begin = var_7308_begin_0, end = var_7308_end_0, end_mask = var_7308_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7308_cast_fp16")]; + tensor var_7320_begin_0 = const()[name = string("op_7320_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_7320_end_0 = const()[name = string("op_7320_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_7320_end_mask_0 = const()[name = string("op_7320_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7320_cast_fp16 = slice_by_index(begin = var_7320_begin_0, end = var_7320_end_0, end_mask = var_7320_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7320_cast_fp16")]; + tensor var_7324_begin_0 = const()[name = string("op_7324_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_7324_end_0 = const()[name = string("op_7324_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_7324_end_mask_0 = const()[name = string("op_7324_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7324_cast_fp16 = slice_by_index(begin = var_7324_begin_0, end = var_7324_end_0, end_mask = var_7324_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7324_cast_fp16")]; + tensor var_7336_begin_0 = const()[name = string("op_7336_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_7336_end_0 = const()[name = string("op_7336_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_7336_end_mask_0 = const()[name = string("op_7336_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7336_cast_fp16 = slice_by_index(begin = var_7336_begin_0, end = var_7336_end_0, end_mask = var_7336_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7336_cast_fp16")]; + tensor var_7340_begin_0 = const()[name = string("op_7340_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_7340_end_0 = const()[name = string("op_7340_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_7340_end_mask_0 = const()[name = string("op_7340_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7340_cast_fp16 = slice_by_index(begin = var_7340_begin_0, end = var_7340_end_0, end_mask = var_7340_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7340_cast_fp16")]; + tensor var_7352_begin_0 = const()[name = string("op_7352_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_7352_end_0 = const()[name = string("op_7352_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_7352_end_mask_0 = const()[name = string("op_7352_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7352_cast_fp16 = slice_by_index(begin = var_7352_begin_0, end = var_7352_end_0, end_mask = var_7352_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7352_cast_fp16")]; + tensor var_7356_begin_0 = const()[name = string("op_7356_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_7356_end_0 = const()[name = string("op_7356_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_7356_end_mask_0 = const()[name = string("op_7356_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7356_cast_fp16 = slice_by_index(begin = var_7356_begin_0, end = var_7356_end_0, end_mask = var_7356_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7356_cast_fp16")]; + tensor var_7368_begin_0 = const()[name = string("op_7368_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_7368_end_0 = const()[name = string("op_7368_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_7368_end_mask_0 = const()[name = string("op_7368_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7368_cast_fp16 = slice_by_index(begin = var_7368_begin_0, end = var_7368_end_0, end_mask = var_7368_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7368_cast_fp16")]; + tensor var_7372_begin_0 = const()[name = string("op_7372_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_7372_end_0 = const()[name = string("op_7372_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_7372_end_mask_0 = const()[name = string("op_7372_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7372_cast_fp16 = slice_by_index(begin = var_7372_begin_0, end = var_7372_end_0, end_mask = var_7372_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7372_cast_fp16")]; + tensor var_7384_begin_0 = const()[name = string("op_7384_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_7384_end_0 = const()[name = string("op_7384_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_7384_end_mask_0 = const()[name = string("op_7384_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7384_cast_fp16 = slice_by_index(begin = var_7384_begin_0, end = var_7384_end_0, end_mask = var_7384_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7384_cast_fp16")]; + tensor var_7388_begin_0 = const()[name = string("op_7388_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_7388_end_0 = const()[name = string("op_7388_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_7388_end_mask_0 = const()[name = string("op_7388_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7388_cast_fp16 = slice_by_index(begin = var_7388_begin_0, end = var_7388_end_0, end_mask = var_7388_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7388_cast_fp16")]; + tensor var_7400_begin_0 = const()[name = string("op_7400_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7400_end_0 = const()[name = string("op_7400_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7400_end_mask_0 = const()[name = string("op_7400_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7400_cast_fp16 = slice_by_index(begin = var_7400_begin_0, end = var_7400_end_0, end_mask = var_7400_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7400_cast_fp16")]; + tensor var_7404_begin_0 = const()[name = string("op_7404_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7404_end_0 = const()[name = string("op_7404_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7404_end_mask_0 = const()[name = string("op_7404_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7404_cast_fp16 = slice_by_index(begin = var_7404_begin_0, end = var_7404_end_0, end_mask = var_7404_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7404_cast_fp16")]; + tensor var_7416_begin_0 = const()[name = string("op_7416_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7416_end_0 = const()[name = string("op_7416_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7416_end_mask_0 = const()[name = string("op_7416_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7416_cast_fp16 = slice_by_index(begin = var_7416_begin_0, end = var_7416_end_0, end_mask = var_7416_end_mask_0, x = key_heads_73_cast_fp16)[name = string("op_7416_cast_fp16")]; + tensor var_7420_begin_0 = const()[name = string("op_7420_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7420_end_0 = const()[name = string("op_7420_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7420_end_mask_0 = const()[name = string("op_7420_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7420_cast_fp16 = slice_by_index(begin = var_7420_begin_0, end = var_7420_end_0, end_mask = var_7420_end_mask_0, x = value_heads_73_cast_fp16)[name = string("op_7420_cast_fp16")]; + bool key_heads_75_interleave_0 = const()[name = string("key_heads_75_interleave_0"), val = bool(false)]; + tensor key_heads_75_cast_fp16 = concat(axis = var_7146, interleave = key_heads_75_interleave_0, values = (var_7304_cast_fp16, var_7304_cast_fp16, var_7320_cast_fp16, var_7320_cast_fp16, var_7336_cast_fp16, var_7336_cast_fp16, var_7352_cast_fp16, var_7352_cast_fp16, var_7368_cast_fp16, var_7368_cast_fp16, var_7384_cast_fp16, var_7384_cast_fp16, var_7400_cast_fp16, var_7400_cast_fp16, var_7416_cast_fp16, var_7416_cast_fp16))[name = string("key_heads_75_cast_fp16")]; + bool value_heads_75_interleave_0 = const()[name = string("value_heads_75_interleave_0"), val = bool(false)]; + tensor value_heads_75_cast_fp16 = concat(axis = var_7146, interleave = value_heads_75_interleave_0, values = (var_7308_cast_fp16, var_7308_cast_fp16, var_7324_cast_fp16, var_7324_cast_fp16, var_7340_cast_fp16, var_7340_cast_fp16, var_7356_cast_fp16, var_7356_cast_fp16, var_7372_cast_fp16, var_7372_cast_fp16, var_7388_cast_fp16, var_7388_cast_fp16, var_7404_cast_fp16, var_7404_cast_fp16, var_7420_cast_fp16, var_7420_cast_fp16))[name = string("value_heads_75_cast_fp16")]; + fp16 var_7443_to_fp16 = const()[name = string("op_7443_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_7444_cast_fp16 = mul(x = mh_q_111_cast_fp16, y = var_7443_to_fp16)[name = string("op_7444_cast_fp16")]; + bool mh_w_73_transpose_x_0 = const()[name = string("mh_w_73_transpose_x_0"), val = bool(true)]; + bool mh_w_73_transpose_y_0 = const()[name = string("mh_w_73_transpose_y_0"), val = bool(false)]; + tensor mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_7444_cast_fp16, y = key_heads_75_cast_fp16)[name = string("mh_w_73_cast_fp16")]; + tensor mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_75_cast_fp16")]; + tensor var_7456_cast_fp16 = softmax(axis = var_7128, x = mh_w_75_cast_fp16)[name = string("op_7456_cast_fp16")]; + bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)]; + bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = value_heads_75_cast_fp16, y = var_7456_cast_fp16)[name = string("attn_37_cast_fp16")]; + tensor var_7461 = const()[name = string("op_7461"), val = tensor([1, -1, 1, 1])]; + tensor input_145_cast_fp16 = reshape(shape = var_7461, x = attn_37_cast_fp16)[name = string("input_145_cast_fp16")]; + string obj_155_pad_type_0 = const()[name = string("obj_155_pad_type_0"), val = string("valid")]; + tensor obj_155_strides_0 = const()[name = string("obj_155_strides_0"), val = tensor([1, 1])]; + tensor obj_155_pad_0 = const()[name = string("obj_155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_155_dilations_0 = const()[name = string("obj_155_dilations_0"), val = tensor([1, 1])]; + int32 obj_155_groups_0 = const()[name = string("obj_155_groups_0"), val = int32(1)]; + tensor layers_18_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287556224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289653440))))[name = string("layers_18_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_155_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_155_dilations_0, groups = obj_155_groups_0, pad = obj_155_pad_0, pad_type = obj_155_pad_type_0, strides = obj_155_strides_0, weight = layers_18_self_attn_o_proj_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("obj_155_cast_fp16")]; + tensor inputs_149_cast_fp16 = add(x = inputs_143_cast_fp16, y = obj_155_cast_fp16)[name = string("inputs_149_cast_fp16")]; + tensor inputs_sq_151_cast_fp16 = mul(x = inputs_149_cast_fp16, y = inputs_149_cast_fp16)[name = string("inputs_sq_151_cast_fp16")]; + tensor variance_151_axes_0 = const()[name = string("variance_151_axes_0"), val = tensor([1])]; + bool variance_151_keep_dims_0 = const()[name = string("variance_151_keep_dims_0"), val = bool(true)]; + tensor variance_151_cast_fp16 = reduce_mean(axes = variance_151_axes_0, keep_dims = variance_151_keep_dims_0, x = inputs_sq_151_cast_fp16)[name = string("variance_151_cast_fp16")]; + fp16 var_7479_to_fp16 = const()[name = string("op_7479_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7480_cast_fp16 = add(x = variance_151_cast_fp16, y = var_7479_to_fp16)[name = string("op_7480_cast_fp16")]; + fp32 var_7481_epsilon_0 = const()[name = string("op_7481_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7481_cast_fp16 = rsqrt(epsilon = var_7481_epsilon_0, x = var_7480_cast_fp16)[name = string("op_7481_cast_fp16")]; + tensor hidden_states_187_cast_fp16 = mul(x = inputs_149_cast_fp16, y = var_7481_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; + tensor w_151_to_fp16 = const()[name = string("w_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289654016)))]; + tensor input_147_cast_fp16 = mul(x = w_151_to_fp16, y = hidden_states_187_cast_fp16)[name = string("input_147_cast_fp16")]; + string input_149_pad_type_0 = const()[name = string("input_149_pad_type_0"), val = string("valid")]; + tensor input_149_strides_0 = const()[name = string("input_149_strides_0"), val = tensor([1, 1])]; + tensor input_149_pad_0 = const()[name = string("input_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_149_dilations_0 = const()[name = string("input_149_dilations_0"), val = tensor([1, 1])]; + int32 input_149_groups_0 = const()[name = string("input_149_groups_0"), val = int32(1)]; + tensor layers_18_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289656128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292801920))))[name = string("layers_18_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_149_cast_fp16 = conv(dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = layers_18_mlp_gate_proj_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = string("input_149_cast_fp16")]; + tensor var_7495_cast_fp16 = silu(x = input_149_cast_fp16)[name = string("op_7495_cast_fp16")]; + string var_7501_pad_type_0 = const()[name = string("op_7501_pad_type_0"), val = string("valid")]; + tensor var_7501_strides_0 = const()[name = string("op_7501_strides_0"), val = tensor([1, 1])]; + tensor var_7501_pad_0 = const()[name = string("op_7501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7501_dilations_0 = const()[name = string("op_7501_dilations_0"), val = tensor([1, 1])]; + int32 var_7501_groups_0 = const()[name = string("op_7501_groups_0"), val = int32(1)]; + tensor layers_18_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292802496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295948288))))[name = string("layers_18_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_7501_cast_fp16 = conv(dilations = var_7501_dilations_0, groups = var_7501_groups_0, pad = var_7501_pad_0, pad_type = var_7501_pad_type_0, strides = var_7501_strides_0, weight = layers_18_mlp_up_proj_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = string("op_7501_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = var_7495_cast_fp16, y = var_7501_cast_fp16)[name = string("input_151_cast_fp16")]; + string hidden_states_189_pad_type_0 = const()[name = string("hidden_states_189_pad_type_0"), val = string("valid")]; + tensor hidden_states_189_strides_0 = const()[name = string("hidden_states_189_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_189_pad_0 = const()[name = string("hidden_states_189_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_189_dilations_0 = const()[name = string("hidden_states_189_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_189_groups_0 = const()[name = string("hidden_states_189_groups_0"), val = int32(1)]; + tensor layers_18_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295948864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299094656))))[name = string("layers_18_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_189_cast_fp16 = conv(dilations = hidden_states_189_dilations_0, groups = hidden_states_189_groups_0, pad = hidden_states_189_pad_0, pad_type = hidden_states_189_pad_type_0, strides = hidden_states_189_strides_0, weight = layers_18_mlp_down_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; + tensor inputs_151_cast_fp16 = add(x = inputs_149_cast_fp16, y = hidden_states_189_cast_fp16)[name = string("inputs_151_cast_fp16")]; + int32 var_7515 = const()[name = string("op_7515"), val = int32(3)]; + int32 var_7525 = const()[name = string("op_7525"), val = int32(-2)]; + int32 var_7533 = const()[name = string("op_7533"), val = int32(1)]; + tensor inputs_sq_153_cast_fp16 = mul(x = inputs_151_cast_fp16, y = inputs_151_cast_fp16)[name = string("inputs_sq_153_cast_fp16")]; + tensor variance_153_axes_0 = const()[name = string("variance_153_axes_0"), val = tensor([1])]; + bool variance_153_keep_dims_0 = const()[name = string("variance_153_keep_dims_0"), val = bool(true)]; + tensor variance_153_cast_fp16 = reduce_mean(axes = variance_153_axes_0, keep_dims = variance_153_keep_dims_0, x = inputs_sq_153_cast_fp16)[name = string("variance_153_cast_fp16")]; + fp16 var_7545_to_fp16 = const()[name = string("op_7545_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7546_cast_fp16 = add(x = variance_153_cast_fp16, y = var_7545_to_fp16)[name = string("op_7546_cast_fp16")]; + fp32 var_7547_epsilon_0 = const()[name = string("op_7547_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7547_cast_fp16 = rsqrt(epsilon = var_7547_epsilon_0, x = var_7546_cast_fp16)[name = string("op_7547_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = mul(x = inputs_151_cast_fp16, y = var_7547_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + tensor w_153_to_fp16 = const()[name = string("w_153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299095232)))]; + tensor obj_157_cast_fp16 = mul(x = w_153_to_fp16, y = hidden_states_191_cast_fp16)[name = string("obj_157_cast_fp16")]; + string query_115_pad_type_0 = const()[name = string("query_115_pad_type_0"), val = string("valid")]; + tensor query_115_strides_0 = const()[name = string("query_115_strides_0"), val = tensor([1, 1])]; + tensor query_115_pad_0 = const()[name = string("query_115_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_115_dilations_0 = const()[name = string("query_115_dilations_0"), val = tensor([1, 1])]; + int32 query_115_groups_0 = const()[name = string("query_115_groups_0"), val = int32(1)]; + tensor layers_19_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299097344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301194560))))[name = string("layers_19_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_115_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_115_dilations_0, groups = query_115_groups_0, pad = query_115_pad_0, pad_type = query_115_pad_type_0, strides = query_115_strides_0, weight = layers_19_self_attn_q_proj_weight_to_fp16_palettized, x = obj_157_cast_fp16)[name = string("query_115_cast_fp16")]; + string current_key_77_pad_type_0 = const()[name = string("current_key_77_pad_type_0"), val = string("valid")]; + tensor current_key_77_strides_0 = const()[name = string("current_key_77_strides_0"), val = tensor([1, 1])]; + tensor current_key_77_pad_0 = const()[name = string("current_key_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_77_dilations_0 = const()[name = string("current_key_77_dilations_0"), val = tensor([1, 1])]; + int32 current_key_77_groups_0 = const()[name = string("current_key_77_groups_0"), val = int32(1)]; + tensor layers_19_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301195136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302243776))))[name = string("layers_19_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_77_cast_fp16 = conv(dilations = current_key_77_dilations_0, groups = current_key_77_groups_0, pad = current_key_77_pad_0, pad_type = current_key_77_pad_type_0, strides = current_key_77_strides_0, weight = layers_19_self_attn_k_proj_weight_to_fp16_palettized, x = obj_157_cast_fp16)[name = string("current_key_77_cast_fp16")]; + string current_value_39_pad_type_0 = const()[name = string("current_value_39_pad_type_0"), val = string("valid")]; + tensor current_value_39_strides_0 = const()[name = string("current_value_39_strides_0"), val = tensor([1, 1])]; + tensor current_value_39_pad_0 = const()[name = string("current_value_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_39_dilations_0 = const()[name = string("current_value_39_dilations_0"), val = tensor([1, 1])]; + int32 current_value_39_groups_0 = const()[name = string("current_value_39_groups_0"), val = int32(1)]; + tensor layers_19_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302244352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303292992))))[name = string("layers_19_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_39_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_39_dilations_0, groups = current_value_39_groups_0, pad = current_value_39_pad_0, pad_type = current_value_39_pad_type_0, strides = current_value_39_strides_0, weight = layers_19_self_attn_v_proj_weight_to_fp16_palettized, x = obj_157_cast_fp16)[name = string("current_value_39_cast_fp16")]; + tensor var_7584 = const()[name = string("op_7584"), val = tensor([16, 128, 1, 1])]; + tensor inputs_153_cast_fp16 = reshape(shape = var_7584, x = query_115_cast_fp16)[name = string("inputs_153_cast_fp16")]; + tensor inputs_sq_155_cast_fp16 = mul(x = inputs_153_cast_fp16, y = inputs_153_cast_fp16)[name = string("inputs_sq_155_cast_fp16")]; + tensor variance_155_axes_0 = const()[name = string("variance_155_axes_0"), val = tensor([1])]; + bool variance_155_keep_dims_0 = const()[name = string("variance_155_keep_dims_0"), val = bool(true)]; + tensor variance_155_cast_fp16 = reduce_mean(axes = variance_155_axes_0, keep_dims = variance_155_keep_dims_0, x = inputs_sq_155_cast_fp16)[name = string("variance_155_cast_fp16")]; + fp16 var_7590_to_fp16 = const()[name = string("op_7590_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7591_cast_fp16 = add(x = variance_155_cast_fp16, y = var_7590_to_fp16)[name = string("op_7591_cast_fp16")]; + fp32 var_7592_epsilon_0 = const()[name = string("op_7592_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7592_cast_fp16 = rsqrt(epsilon = var_7592_epsilon_0, x = var_7591_cast_fp16)[name = string("op_7592_cast_fp16")]; + tensor hidden_states_193_cast_fp16 = mul(x = inputs_153_cast_fp16, y = var_7592_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; + tensor w_155_to_fp16 = const()[name = string("w_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303293568)))]; + tensor query_normed_39_cast_fp16 = mul(x = w_155_to_fp16, y = hidden_states_193_cast_fp16)[name = string("query_normed_39_cast_fp16")]; + tensor var_7600 = const()[name = string("op_7600"), val = tensor([8, 128, 1, 1])]; + tensor inputs_155_cast_fp16 = reshape(shape = var_7600, x = current_key_77_cast_fp16)[name = string("inputs_155_cast_fp16")]; + tensor inputs_sq_157_cast_fp16 = mul(x = inputs_155_cast_fp16, y = inputs_155_cast_fp16)[name = string("inputs_sq_157_cast_fp16")]; + tensor variance_157_axes_0 = const()[name = string("variance_157_axes_0"), val = tensor([1])]; + bool variance_157_keep_dims_0 = const()[name = string("variance_157_keep_dims_0"), val = bool(true)]; + tensor variance_157_cast_fp16 = reduce_mean(axes = variance_157_axes_0, keep_dims = variance_157_keep_dims_0, x = inputs_sq_157_cast_fp16)[name = string("variance_157_cast_fp16")]; + fp16 var_7606_to_fp16 = const()[name = string("op_7606_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7607_cast_fp16 = add(x = variance_157_cast_fp16, y = var_7606_to_fp16)[name = string("op_7607_cast_fp16")]; + fp32 var_7608_epsilon_0 = const()[name = string("op_7608_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7608_cast_fp16 = rsqrt(epsilon = var_7608_epsilon_0, x = var_7607_cast_fp16)[name = string("op_7608_cast_fp16")]; + tensor hidden_states_195_cast_fp16 = mul(x = inputs_155_cast_fp16, y = var_7608_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor w_157_to_fp16 = const()[name = string("w_157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303293888)))]; + tensor current_key_normed_39_cast_fp16 = mul(x = w_157_to_fp16, y = hidden_states_195_cast_fp16)[name = string("current_key_normed_39_cast_fp16")]; + tensor var_7626 = const()[name = string("op_7626"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_115_cast_fp16 = reshape(shape = var_7626, x = query_normed_39_cast_fp16)[name = string("mh_q_115_cast_fp16")]; + tensor var_7628 = const()[name = string("op_7628"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_77_cast_fp16 = reshape(shape = var_7628, x = current_key_normed_39_cast_fp16)[name = string("mh_k_77_cast_fp16")]; + tensor var_7632_cast_fp16 = mul(x = mh_q_115_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7632_cast_fp16")]; + tensor var_7637_begin_0 = const()[name = string("op_7637_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7637_end_0 = const()[name = string("op_7637_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_7637_end_mask_0 = const()[name = string("op_7637_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_7637_cast_fp16 = slice_by_index(begin = var_7637_begin_0, end = var_7637_end_0, end_mask = var_7637_end_mask_0, x = mh_q_115_cast_fp16)[name = string("op_7637_cast_fp16")]; + tensor var_7643_begin_0 = const()[name = string("op_7643_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7643_end_0 = const()[name = string("op_7643_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_7643_end_mask_0 = const()[name = string("op_7643_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7643_cast_fp16 = slice_by_index(begin = var_7643_begin_0, end = var_7643_end_0, end_mask = var_7643_end_mask_0, x = mh_q_115_cast_fp16)[name = string("op_7643_cast_fp16")]; + fp16 const_454_promoted_to_fp16 = const()[name = string("const_454_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7645_cast_fp16 = mul(x = var_7643_cast_fp16, y = const_454_promoted_to_fp16)[name = string("op_7645_cast_fp16")]; + bool var_7647_interleave_0 = const()[name = string("op_7647_interleave_0"), val = bool(false)]; + tensor var_7647_cast_fp16 = concat(axis = var_7525, interleave = var_7647_interleave_0, values = (var_7645_cast_fp16, var_7637_cast_fp16))[name = string("op_7647_cast_fp16")]; + tensor var_7648_cast_fp16 = mul(x = var_7647_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7648_cast_fp16")]; + tensor mh_q_117_cast_fp16 = add(x = var_7632_cast_fp16, y = var_7648_cast_fp16)[name = string("mh_q_117_cast_fp16")]; + tensor var_7650_cast_fp16 = mul(x = mh_k_77_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7650_cast_fp16")]; + tensor var_7655_begin_0 = const()[name = string("op_7655_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7655_end_0 = const()[name = string("op_7655_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_7655_end_mask_0 = const()[name = string("op_7655_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_7655_cast_fp16 = slice_by_index(begin = var_7655_begin_0, end = var_7655_end_0, end_mask = var_7655_end_mask_0, x = mh_k_77_cast_fp16)[name = string("op_7655_cast_fp16")]; + tensor var_7661_begin_0 = const()[name = string("op_7661_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_7661_end_0 = const()[name = string("op_7661_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_7661_end_mask_0 = const()[name = string("op_7661_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7661_cast_fp16 = slice_by_index(begin = var_7661_begin_0, end = var_7661_end_0, end_mask = var_7661_end_mask_0, x = mh_k_77_cast_fp16)[name = string("op_7661_cast_fp16")]; + fp16 const_457_promoted_to_fp16 = const()[name = string("const_457_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7663_cast_fp16 = mul(x = var_7661_cast_fp16, y = const_457_promoted_to_fp16)[name = string("op_7663_cast_fp16")]; + bool var_7665_interleave_0 = const()[name = string("op_7665_interleave_0"), val = bool(false)]; + tensor var_7665_cast_fp16 = concat(axis = var_7525, interleave = var_7665_interleave_0, values = (var_7663_cast_fp16, var_7655_cast_fp16))[name = string("op_7665_cast_fp16")]; + tensor var_7666_cast_fp16 = mul(x = var_7665_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7666_cast_fp16")]; + tensor mh_k_79_cast_fp16 = add(x = var_7650_cast_fp16, y = var_7666_cast_fp16)[name = string("mh_k_79_cast_fp16")]; + tensor var_7670 = const()[name = string("op_7670"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_79_cast_fp16 = reshape(shape = var_7670, x = mh_k_79_cast_fp16)[name = string("current_key_79_cast_fp16")]; + tensor var_7677_cast_fp16 = mul(x = var_101_cast_fp16_19, y = var_323_cast_fp16)[name = string("op_7677_cast_fp16")]; + tensor var_7678_cast_fp16 = mul(x = current_key_79_cast_fp16, y = var_321_cast_fp16)[name = string("op_7678_cast_fp16")]; + tensor key_117_cast_fp16 = add(x = var_7677_cast_fp16, y = var_7678_cast_fp16)[name = string("key_117_cast_fp16")]; + tensor var_7681_cast_fp16 = mul(x = var_132_cast_fp16_19, y = var_323_cast_fp16)[name = string("op_7681_cast_fp16")]; + tensor var_7682_cast_fp16 = mul(x = current_value_39_cast_fp16, y = var_321_cast_fp16)[name = string("op_7682_cast_fp16")]; + tensor value_77_cast_fp16 = add(x = var_7681_cast_fp16, y = var_7682_cast_fp16)[name = string("value_77_cast_fp16")]; + tensor var_7686 = const()[name = string("op_7686"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_77_cast_fp16 = reshape(shape = var_7686, x = key_117_cast_fp16)[name = string("key_heads_77_cast_fp16")]; + tensor var_7688 = const()[name = string("op_7688"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_77_cast_fp16 = reshape(shape = var_7688, x = value_77_cast_fp16)[name = string("value_heads_77_cast_fp16")]; + tensor var_7691_begin_0 = const()[name = string("op_7691_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7691_end_0 = const()[name = string("op_7691_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7691_end_mask_0 = const()[name = string("op_7691_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7691_cast_fp16 = slice_by_index(begin = var_7691_begin_0, end = var_7691_end_0, end_mask = var_7691_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7691_cast_fp16")]; + tensor var_7695_begin_0 = const()[name = string("op_7695_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7695_end_0 = const()[name = string("op_7695_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7695_end_mask_0 = const()[name = string("op_7695_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7695_cast_fp16 = slice_by_index(begin = var_7695_begin_0, end = var_7695_end_0, end_mask = var_7695_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7695_cast_fp16")]; + tensor var_7707_begin_0 = const()[name = string("op_7707_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_7707_end_0 = const()[name = string("op_7707_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_7707_end_mask_0 = const()[name = string("op_7707_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7707_cast_fp16 = slice_by_index(begin = var_7707_begin_0, end = var_7707_end_0, end_mask = var_7707_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7707_cast_fp16")]; + tensor var_7711_begin_0 = const()[name = string("op_7711_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_7711_end_0 = const()[name = string("op_7711_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_7711_end_mask_0 = const()[name = string("op_7711_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7711_cast_fp16 = slice_by_index(begin = var_7711_begin_0, end = var_7711_end_0, end_mask = var_7711_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7711_cast_fp16")]; + tensor var_7723_begin_0 = const()[name = string("op_7723_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_7723_end_0 = const()[name = string("op_7723_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_7723_end_mask_0 = const()[name = string("op_7723_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7723_cast_fp16 = slice_by_index(begin = var_7723_begin_0, end = var_7723_end_0, end_mask = var_7723_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7723_cast_fp16")]; + tensor var_7727_begin_0 = const()[name = string("op_7727_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_7727_end_0 = const()[name = string("op_7727_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_7727_end_mask_0 = const()[name = string("op_7727_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7727_cast_fp16 = slice_by_index(begin = var_7727_begin_0, end = var_7727_end_0, end_mask = var_7727_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7727_cast_fp16")]; + tensor var_7739_begin_0 = const()[name = string("op_7739_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_7739_end_0 = const()[name = string("op_7739_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_7739_end_mask_0 = const()[name = string("op_7739_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7739_cast_fp16 = slice_by_index(begin = var_7739_begin_0, end = var_7739_end_0, end_mask = var_7739_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7739_cast_fp16")]; + tensor var_7743_begin_0 = const()[name = string("op_7743_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_7743_end_0 = const()[name = string("op_7743_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_7743_end_mask_0 = const()[name = string("op_7743_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7743_cast_fp16 = slice_by_index(begin = var_7743_begin_0, end = var_7743_end_0, end_mask = var_7743_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7743_cast_fp16")]; + tensor var_7755_begin_0 = const()[name = string("op_7755_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_7755_end_0 = const()[name = string("op_7755_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_7755_end_mask_0 = const()[name = string("op_7755_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7755_cast_fp16 = slice_by_index(begin = var_7755_begin_0, end = var_7755_end_0, end_mask = var_7755_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7755_cast_fp16")]; + tensor var_7759_begin_0 = const()[name = string("op_7759_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_7759_end_0 = const()[name = string("op_7759_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_7759_end_mask_0 = const()[name = string("op_7759_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7759_cast_fp16 = slice_by_index(begin = var_7759_begin_0, end = var_7759_end_0, end_mask = var_7759_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7759_cast_fp16")]; + tensor var_7771_begin_0 = const()[name = string("op_7771_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_7771_end_0 = const()[name = string("op_7771_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_7771_end_mask_0 = const()[name = string("op_7771_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7771_cast_fp16 = slice_by_index(begin = var_7771_begin_0, end = var_7771_end_0, end_mask = var_7771_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7771_cast_fp16")]; + tensor var_7775_begin_0 = const()[name = string("op_7775_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_7775_end_0 = const()[name = string("op_7775_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_7775_end_mask_0 = const()[name = string("op_7775_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7775_cast_fp16 = slice_by_index(begin = var_7775_begin_0, end = var_7775_end_0, end_mask = var_7775_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7775_cast_fp16")]; + tensor var_7787_begin_0 = const()[name = string("op_7787_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7787_end_0 = const()[name = string("op_7787_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7787_end_mask_0 = const()[name = string("op_7787_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7787_cast_fp16 = slice_by_index(begin = var_7787_begin_0, end = var_7787_end_0, end_mask = var_7787_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7787_cast_fp16")]; + tensor var_7791_begin_0 = const()[name = string("op_7791_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_7791_end_0 = const()[name = string("op_7791_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_7791_end_mask_0 = const()[name = string("op_7791_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7791_cast_fp16 = slice_by_index(begin = var_7791_begin_0, end = var_7791_end_0, end_mask = var_7791_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7791_cast_fp16")]; + tensor var_7803_begin_0 = const()[name = string("op_7803_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7803_end_0 = const()[name = string("op_7803_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7803_end_mask_0 = const()[name = string("op_7803_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7803_cast_fp16 = slice_by_index(begin = var_7803_begin_0, end = var_7803_end_0, end_mask = var_7803_end_mask_0, x = key_heads_77_cast_fp16)[name = string("op_7803_cast_fp16")]; + tensor var_7807_begin_0 = const()[name = string("op_7807_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_7807_end_0 = const()[name = string("op_7807_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_7807_end_mask_0 = const()[name = string("op_7807_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7807_cast_fp16 = slice_by_index(begin = var_7807_begin_0, end = var_7807_end_0, end_mask = var_7807_end_mask_0, x = value_heads_77_cast_fp16)[name = string("op_7807_cast_fp16")]; + bool key_heads_79_interleave_0 = const()[name = string("key_heads_79_interleave_0"), val = bool(false)]; + tensor key_heads_79_cast_fp16 = concat(axis = var_7533, interleave = key_heads_79_interleave_0, values = (var_7691_cast_fp16, var_7691_cast_fp16, var_7707_cast_fp16, var_7707_cast_fp16, var_7723_cast_fp16, var_7723_cast_fp16, var_7739_cast_fp16, var_7739_cast_fp16, var_7755_cast_fp16, var_7755_cast_fp16, var_7771_cast_fp16, var_7771_cast_fp16, var_7787_cast_fp16, var_7787_cast_fp16, var_7803_cast_fp16, var_7803_cast_fp16))[name = string("key_heads_79_cast_fp16")]; + bool value_heads_79_interleave_0 = const()[name = string("value_heads_79_interleave_0"), val = bool(false)]; + tensor value_heads_79_cast_fp16 = concat(axis = var_7533, interleave = value_heads_79_interleave_0, values = (var_7695_cast_fp16, var_7695_cast_fp16, var_7711_cast_fp16, var_7711_cast_fp16, var_7727_cast_fp16, var_7727_cast_fp16, var_7743_cast_fp16, var_7743_cast_fp16, var_7759_cast_fp16, var_7759_cast_fp16, var_7775_cast_fp16, var_7775_cast_fp16, var_7791_cast_fp16, var_7791_cast_fp16, var_7807_cast_fp16, var_7807_cast_fp16))[name = string("value_heads_79_cast_fp16")]; + fp16 var_7830_to_fp16 = const()[name = string("op_7830_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_7831_cast_fp16 = mul(x = mh_q_117_cast_fp16, y = var_7830_to_fp16)[name = string("op_7831_cast_fp16")]; + bool mh_w_77_transpose_x_0 = const()[name = string("mh_w_77_transpose_x_0"), val = bool(true)]; + bool mh_w_77_transpose_y_0 = const()[name = string("mh_w_77_transpose_y_0"), val = bool(false)]; + tensor mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_7831_cast_fp16, y = key_heads_79_cast_fp16)[name = string("mh_w_77_cast_fp16")]; + tensor mh_w_79_cast_fp16 = add(x = mh_w_77_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_79_cast_fp16")]; + tensor var_7843_cast_fp16 = softmax(axis = var_7515, x = mh_w_79_cast_fp16)[name = string("op_7843_cast_fp16")]; + bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)]; + bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = value_heads_79_cast_fp16, y = var_7843_cast_fp16)[name = string("attn_39_cast_fp16")]; + tensor var_7848 = const()[name = string("op_7848"), val = tensor([1, -1, 1, 1])]; + tensor input_153_cast_fp16 = reshape(shape = var_7848, x = attn_39_cast_fp16)[name = string("input_153_cast_fp16")]; + string obj_163_pad_type_0 = const()[name = string("obj_163_pad_type_0"), val = string("valid")]; + tensor obj_163_strides_0 = const()[name = string("obj_163_strides_0"), val = tensor([1, 1])]; + tensor obj_163_pad_0 = const()[name = string("obj_163_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_163_dilations_0 = const()[name = string("obj_163_dilations_0"), val = tensor([1, 1])]; + int32 obj_163_groups_0 = const()[name = string("obj_163_groups_0"), val = int32(1)]; + tensor layers_19_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303294208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305391424))))[name = string("layers_19_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_163_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_163_dilations_0, groups = obj_163_groups_0, pad = obj_163_pad_0, pad_type = obj_163_pad_type_0, strides = obj_163_strides_0, weight = layers_19_self_attn_o_proj_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = string("obj_163_cast_fp16")]; + tensor inputs_157_cast_fp16 = add(x = inputs_151_cast_fp16, y = obj_163_cast_fp16)[name = string("inputs_157_cast_fp16")]; + tensor inputs_sq_159_cast_fp16 = mul(x = inputs_157_cast_fp16, y = inputs_157_cast_fp16)[name = string("inputs_sq_159_cast_fp16")]; + tensor variance_159_axes_0 = const()[name = string("variance_159_axes_0"), val = tensor([1])]; + bool variance_159_keep_dims_0 = const()[name = string("variance_159_keep_dims_0"), val = bool(true)]; + tensor variance_159_cast_fp16 = reduce_mean(axes = variance_159_axes_0, keep_dims = variance_159_keep_dims_0, x = inputs_sq_159_cast_fp16)[name = string("variance_159_cast_fp16")]; + fp16 var_7866_to_fp16 = const()[name = string("op_7866_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7867_cast_fp16 = add(x = variance_159_cast_fp16, y = var_7866_to_fp16)[name = string("op_7867_cast_fp16")]; + fp32 var_7868_epsilon_0 = const()[name = string("op_7868_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7868_cast_fp16 = rsqrt(epsilon = var_7868_epsilon_0, x = var_7867_cast_fp16)[name = string("op_7868_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = mul(x = inputs_157_cast_fp16, y = var_7868_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + tensor w_159_to_fp16 = const()[name = string("w_159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305392000)))]; + tensor input_155_cast_fp16 = mul(x = w_159_to_fp16, y = hidden_states_197_cast_fp16)[name = string("input_155_cast_fp16")]; + string input_157_pad_type_0 = const()[name = string("input_157_pad_type_0"), val = string("valid")]; + tensor input_157_strides_0 = const()[name = string("input_157_strides_0"), val = tensor([1, 1])]; + tensor input_157_pad_0 = const()[name = string("input_157_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_157_dilations_0 = const()[name = string("input_157_dilations_0"), val = tensor([1, 1])]; + int32 input_157_groups_0 = const()[name = string("input_157_groups_0"), val = int32(1)]; + tensor layers_19_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305394112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308539904))))[name = string("layers_19_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_157_cast_fp16 = conv(dilations = input_157_dilations_0, groups = input_157_groups_0, pad = input_157_pad_0, pad_type = input_157_pad_type_0, strides = input_157_strides_0, weight = layers_19_mlp_gate_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("input_157_cast_fp16")]; + tensor var_7882_cast_fp16 = silu(x = input_157_cast_fp16)[name = string("op_7882_cast_fp16")]; + string var_7888_pad_type_0 = const()[name = string("op_7888_pad_type_0"), val = string("valid")]; + tensor var_7888_strides_0 = const()[name = string("op_7888_strides_0"), val = tensor([1, 1])]; + tensor var_7888_pad_0 = const()[name = string("op_7888_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7888_dilations_0 = const()[name = string("op_7888_dilations_0"), val = tensor([1, 1])]; + int32 var_7888_groups_0 = const()[name = string("op_7888_groups_0"), val = int32(1)]; + tensor layers_19_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308540480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311686272))))[name = string("layers_19_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_7888_cast_fp16 = conv(dilations = var_7888_dilations_0, groups = var_7888_groups_0, pad = var_7888_pad_0, pad_type = var_7888_pad_type_0, strides = var_7888_strides_0, weight = layers_19_mlp_up_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("op_7888_cast_fp16")]; + tensor input_159_cast_fp16 = mul(x = var_7882_cast_fp16, y = var_7888_cast_fp16)[name = string("input_159_cast_fp16")]; + string hidden_states_199_pad_type_0 = const()[name = string("hidden_states_199_pad_type_0"), val = string("valid")]; + tensor hidden_states_199_strides_0 = const()[name = string("hidden_states_199_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_199_pad_0 = const()[name = string("hidden_states_199_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_199_dilations_0 = const()[name = string("hidden_states_199_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_199_groups_0 = const()[name = string("hidden_states_199_groups_0"), val = int32(1)]; + tensor layers_19_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311686848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314832640))))[name = string("layers_19_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_199_cast_fp16 = conv(dilations = hidden_states_199_dilations_0, groups = hidden_states_199_groups_0, pad = hidden_states_199_pad_0, pad_type = hidden_states_199_pad_type_0, strides = hidden_states_199_strides_0, weight = layers_19_mlp_down_proj_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor inputs_159_cast_fp16 = add(x = inputs_157_cast_fp16, y = hidden_states_199_cast_fp16)[name = string("inputs_159_cast_fp16")]; + int32 var_7902 = const()[name = string("op_7902"), val = int32(3)]; + int32 var_7912 = const()[name = string("op_7912"), val = int32(-2)]; + int32 var_7920 = const()[name = string("op_7920"), val = int32(1)]; + tensor inputs_sq_161_cast_fp16 = mul(x = inputs_159_cast_fp16, y = inputs_159_cast_fp16)[name = string("inputs_sq_161_cast_fp16")]; + tensor variance_161_axes_0 = const()[name = string("variance_161_axes_0"), val = tensor([1])]; + bool variance_161_keep_dims_0 = const()[name = string("variance_161_keep_dims_0"), val = bool(true)]; + tensor variance_161_cast_fp16 = reduce_mean(axes = variance_161_axes_0, keep_dims = variance_161_keep_dims_0, x = inputs_sq_161_cast_fp16)[name = string("variance_161_cast_fp16")]; + fp16 var_7932_to_fp16 = const()[name = string("op_7932_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7933_cast_fp16 = add(x = variance_161_cast_fp16, y = var_7932_to_fp16)[name = string("op_7933_cast_fp16")]; + fp32 var_7934_epsilon_0 = const()[name = string("op_7934_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7934_cast_fp16 = rsqrt(epsilon = var_7934_epsilon_0, x = var_7933_cast_fp16)[name = string("op_7934_cast_fp16")]; + tensor hidden_states_201_cast_fp16 = mul(x = inputs_159_cast_fp16, y = var_7934_cast_fp16)[name = string("hidden_states_201_cast_fp16")]; + tensor w_161_to_fp16 = const()[name = string("w_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314833216)))]; + tensor obj_165_cast_fp16 = mul(x = w_161_to_fp16, y = hidden_states_201_cast_fp16)[name = string("obj_165_cast_fp16")]; + string query_121_pad_type_0 = const()[name = string("query_121_pad_type_0"), val = string("valid")]; + tensor query_121_strides_0 = const()[name = string("query_121_strides_0"), val = tensor([1, 1])]; + tensor query_121_pad_0 = const()[name = string("query_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_121_dilations_0 = const()[name = string("query_121_dilations_0"), val = tensor([1, 1])]; + int32 query_121_groups_0 = const()[name = string("query_121_groups_0"), val = int32(1)]; + tensor layers_20_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314835328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316932544))))[name = string("layers_20_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_121_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_121_dilations_0, groups = query_121_groups_0, pad = query_121_pad_0, pad_type = query_121_pad_type_0, strides = query_121_strides_0, weight = layers_20_self_attn_q_proj_weight_to_fp16_palettized, x = obj_165_cast_fp16)[name = string("query_121_cast_fp16")]; + string current_key_81_pad_type_0 = const()[name = string("current_key_81_pad_type_0"), val = string("valid")]; + tensor current_key_81_strides_0 = const()[name = string("current_key_81_strides_0"), val = tensor([1, 1])]; + tensor current_key_81_pad_0 = const()[name = string("current_key_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_81_dilations_0 = const()[name = string("current_key_81_dilations_0"), val = tensor([1, 1])]; + int32 current_key_81_groups_0 = const()[name = string("current_key_81_groups_0"), val = int32(1)]; + tensor layers_20_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316933120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317981760))))[name = string("layers_20_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_81_cast_fp16 = conv(dilations = current_key_81_dilations_0, groups = current_key_81_groups_0, pad = current_key_81_pad_0, pad_type = current_key_81_pad_type_0, strides = current_key_81_strides_0, weight = layers_20_self_attn_k_proj_weight_to_fp16_palettized, x = obj_165_cast_fp16)[name = string("current_key_81_cast_fp16")]; + string current_value_41_pad_type_0 = const()[name = string("current_value_41_pad_type_0"), val = string("valid")]; + tensor current_value_41_strides_0 = const()[name = string("current_value_41_strides_0"), val = tensor([1, 1])]; + tensor current_value_41_pad_0 = const()[name = string("current_value_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_41_dilations_0 = const()[name = string("current_value_41_dilations_0"), val = tensor([1, 1])]; + int32 current_value_41_groups_0 = const()[name = string("current_value_41_groups_0"), val = int32(1)]; + tensor layers_20_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317982336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319030976))))[name = string("layers_20_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_41_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_41_dilations_0, groups = current_value_41_groups_0, pad = current_value_41_pad_0, pad_type = current_value_41_pad_type_0, strides = current_value_41_strides_0, weight = layers_20_self_attn_v_proj_weight_to_fp16_palettized, x = obj_165_cast_fp16)[name = string("current_value_41_cast_fp16")]; + tensor var_7971 = const()[name = string("op_7971"), val = tensor([16, 128, 1, 1])]; + tensor inputs_161_cast_fp16 = reshape(shape = var_7971, x = query_121_cast_fp16)[name = string("inputs_161_cast_fp16")]; + tensor inputs_sq_163_cast_fp16 = mul(x = inputs_161_cast_fp16, y = inputs_161_cast_fp16)[name = string("inputs_sq_163_cast_fp16")]; + tensor variance_163_axes_0 = const()[name = string("variance_163_axes_0"), val = tensor([1])]; + bool variance_163_keep_dims_0 = const()[name = string("variance_163_keep_dims_0"), val = bool(true)]; + tensor variance_163_cast_fp16 = reduce_mean(axes = variance_163_axes_0, keep_dims = variance_163_keep_dims_0, x = inputs_sq_163_cast_fp16)[name = string("variance_163_cast_fp16")]; + fp16 var_7977_to_fp16 = const()[name = string("op_7977_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7978_cast_fp16 = add(x = variance_163_cast_fp16, y = var_7977_to_fp16)[name = string("op_7978_cast_fp16")]; + fp32 var_7979_epsilon_0 = const()[name = string("op_7979_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7979_cast_fp16 = rsqrt(epsilon = var_7979_epsilon_0, x = var_7978_cast_fp16)[name = string("op_7979_cast_fp16")]; + tensor hidden_states_203_cast_fp16 = mul(x = inputs_161_cast_fp16, y = var_7979_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor w_163_to_fp16 = const()[name = string("w_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031552)))]; + tensor query_normed_41_cast_fp16 = mul(x = w_163_to_fp16, y = hidden_states_203_cast_fp16)[name = string("query_normed_41_cast_fp16")]; + tensor var_7987 = const()[name = string("op_7987"), val = tensor([8, 128, 1, 1])]; + tensor inputs_163_cast_fp16 = reshape(shape = var_7987, x = current_key_81_cast_fp16)[name = string("inputs_163_cast_fp16")]; + tensor inputs_sq_165_cast_fp16 = mul(x = inputs_163_cast_fp16, y = inputs_163_cast_fp16)[name = string("inputs_sq_165_cast_fp16")]; + tensor variance_165_axes_0 = const()[name = string("variance_165_axes_0"), val = tensor([1])]; + bool variance_165_keep_dims_0 = const()[name = string("variance_165_keep_dims_0"), val = bool(true)]; + tensor variance_165_cast_fp16 = reduce_mean(axes = variance_165_axes_0, keep_dims = variance_165_keep_dims_0, x = inputs_sq_165_cast_fp16)[name = string("variance_165_cast_fp16")]; + fp16 var_7993_to_fp16 = const()[name = string("op_7993_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_7994_cast_fp16 = add(x = variance_165_cast_fp16, y = var_7993_to_fp16)[name = string("op_7994_cast_fp16")]; + fp32 var_7995_epsilon_0 = const()[name = string("op_7995_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7995_cast_fp16 = rsqrt(epsilon = var_7995_epsilon_0, x = var_7994_cast_fp16)[name = string("op_7995_cast_fp16")]; + tensor hidden_states_205_cast_fp16 = mul(x = inputs_163_cast_fp16, y = var_7995_cast_fp16)[name = string("hidden_states_205_cast_fp16")]; + tensor w_165_to_fp16 = const()[name = string("w_165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031872)))]; + tensor current_key_normed_41_cast_fp16 = mul(x = w_165_to_fp16, y = hidden_states_205_cast_fp16)[name = string("current_key_normed_41_cast_fp16")]; + tensor var_8013 = const()[name = string("op_8013"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_121_cast_fp16 = reshape(shape = var_8013, x = query_normed_41_cast_fp16)[name = string("mh_q_121_cast_fp16")]; + tensor var_8015 = const()[name = string("op_8015"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_81_cast_fp16 = reshape(shape = var_8015, x = current_key_normed_41_cast_fp16)[name = string("mh_k_81_cast_fp16")]; + tensor var_8019_cast_fp16 = mul(x = mh_q_121_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8019_cast_fp16")]; + tensor var_8024_begin_0 = const()[name = string("op_8024_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8024_end_0 = const()[name = string("op_8024_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_8024_end_mask_0 = const()[name = string("op_8024_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8024_cast_fp16 = slice_by_index(begin = var_8024_begin_0, end = var_8024_end_0, end_mask = var_8024_end_mask_0, x = mh_q_121_cast_fp16)[name = string("op_8024_cast_fp16")]; + tensor var_8030_begin_0 = const()[name = string("op_8030_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8030_end_0 = const()[name = string("op_8030_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_8030_end_mask_0 = const()[name = string("op_8030_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8030_cast_fp16 = slice_by_index(begin = var_8030_begin_0, end = var_8030_end_0, end_mask = var_8030_end_mask_0, x = mh_q_121_cast_fp16)[name = string("op_8030_cast_fp16")]; + fp16 const_477_promoted_to_fp16 = const()[name = string("const_477_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8032_cast_fp16 = mul(x = var_8030_cast_fp16, y = const_477_promoted_to_fp16)[name = string("op_8032_cast_fp16")]; + bool var_8034_interleave_0 = const()[name = string("op_8034_interleave_0"), val = bool(false)]; + tensor var_8034_cast_fp16 = concat(axis = var_7912, interleave = var_8034_interleave_0, values = (var_8032_cast_fp16, var_8024_cast_fp16))[name = string("op_8034_cast_fp16")]; + tensor var_8035_cast_fp16 = mul(x = var_8034_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8035_cast_fp16")]; + tensor mh_q_123_cast_fp16 = add(x = var_8019_cast_fp16, y = var_8035_cast_fp16)[name = string("mh_q_123_cast_fp16")]; + tensor var_8037_cast_fp16 = mul(x = mh_k_81_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8037_cast_fp16")]; + tensor var_8042_begin_0 = const()[name = string("op_8042_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8042_end_0 = const()[name = string("op_8042_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_8042_end_mask_0 = const()[name = string("op_8042_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8042_cast_fp16 = slice_by_index(begin = var_8042_begin_0, end = var_8042_end_0, end_mask = var_8042_end_mask_0, x = mh_k_81_cast_fp16)[name = string("op_8042_cast_fp16")]; + tensor var_8048_begin_0 = const()[name = string("op_8048_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8048_end_0 = const()[name = string("op_8048_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_8048_end_mask_0 = const()[name = string("op_8048_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8048_cast_fp16 = slice_by_index(begin = var_8048_begin_0, end = var_8048_end_0, end_mask = var_8048_end_mask_0, x = mh_k_81_cast_fp16)[name = string("op_8048_cast_fp16")]; + fp16 const_480_promoted_to_fp16 = const()[name = string("const_480_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8050_cast_fp16 = mul(x = var_8048_cast_fp16, y = const_480_promoted_to_fp16)[name = string("op_8050_cast_fp16")]; + bool var_8052_interleave_0 = const()[name = string("op_8052_interleave_0"), val = bool(false)]; + tensor var_8052_cast_fp16 = concat(axis = var_7912, interleave = var_8052_interleave_0, values = (var_8050_cast_fp16, var_8042_cast_fp16))[name = string("op_8052_cast_fp16")]; + tensor var_8053_cast_fp16 = mul(x = var_8052_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8053_cast_fp16")]; + tensor mh_k_83_cast_fp16 = add(x = var_8037_cast_fp16, y = var_8053_cast_fp16)[name = string("mh_k_83_cast_fp16")]; + tensor var_8057 = const()[name = string("op_8057"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_83_cast_fp16 = reshape(shape = var_8057, x = mh_k_83_cast_fp16)[name = string("current_key_83_cast_fp16")]; + tensor var_8064_cast_fp16 = mul(x = var_101_cast_fp16_20, y = var_323_cast_fp16)[name = string("op_8064_cast_fp16")]; + tensor var_8065_cast_fp16 = mul(x = current_key_83_cast_fp16, y = var_321_cast_fp16)[name = string("op_8065_cast_fp16")]; + tensor key_123_cast_fp16 = add(x = var_8064_cast_fp16, y = var_8065_cast_fp16)[name = string("key_123_cast_fp16")]; + tensor var_8068_cast_fp16 = mul(x = var_132_cast_fp16_20, y = var_323_cast_fp16)[name = string("op_8068_cast_fp16")]; + tensor var_8069_cast_fp16 = mul(x = current_value_41_cast_fp16, y = var_321_cast_fp16)[name = string("op_8069_cast_fp16")]; + tensor value_81_cast_fp16 = add(x = var_8068_cast_fp16, y = var_8069_cast_fp16)[name = string("value_81_cast_fp16")]; + tensor var_8073 = const()[name = string("op_8073"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_81_cast_fp16 = reshape(shape = var_8073, x = key_123_cast_fp16)[name = string("key_heads_81_cast_fp16")]; + tensor var_8075 = const()[name = string("op_8075"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_81_cast_fp16 = reshape(shape = var_8075, x = value_81_cast_fp16)[name = string("value_heads_81_cast_fp16")]; + tensor var_8078_begin_0 = const()[name = string("op_8078_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8078_end_0 = const()[name = string("op_8078_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8078_end_mask_0 = const()[name = string("op_8078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8078_cast_fp16 = slice_by_index(begin = var_8078_begin_0, end = var_8078_end_0, end_mask = var_8078_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8078_cast_fp16")]; + tensor var_8082_begin_0 = const()[name = string("op_8082_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8082_end_0 = const()[name = string("op_8082_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8082_end_mask_0 = const()[name = string("op_8082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8082_cast_fp16 = slice_by_index(begin = var_8082_begin_0, end = var_8082_end_0, end_mask = var_8082_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8082_cast_fp16")]; + tensor var_8094_begin_0 = const()[name = string("op_8094_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8094_end_0 = const()[name = string("op_8094_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8094_end_mask_0 = const()[name = string("op_8094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8094_cast_fp16 = slice_by_index(begin = var_8094_begin_0, end = var_8094_end_0, end_mask = var_8094_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8094_cast_fp16")]; + tensor var_8098_begin_0 = const()[name = string("op_8098_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8098_end_0 = const()[name = string("op_8098_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8098_end_mask_0 = const()[name = string("op_8098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8098_cast_fp16 = slice_by_index(begin = var_8098_begin_0, end = var_8098_end_0, end_mask = var_8098_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8098_cast_fp16")]; + tensor var_8110_begin_0 = const()[name = string("op_8110_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8110_end_0 = const()[name = string("op_8110_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8110_end_mask_0 = const()[name = string("op_8110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8110_cast_fp16 = slice_by_index(begin = var_8110_begin_0, end = var_8110_end_0, end_mask = var_8110_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8110_cast_fp16")]; + tensor var_8114_begin_0 = const()[name = string("op_8114_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8114_end_0 = const()[name = string("op_8114_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8114_end_mask_0 = const()[name = string("op_8114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8114_cast_fp16 = slice_by_index(begin = var_8114_begin_0, end = var_8114_end_0, end_mask = var_8114_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8114_cast_fp16")]; + tensor var_8126_begin_0 = const()[name = string("op_8126_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8126_end_0 = const()[name = string("op_8126_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8126_end_mask_0 = const()[name = string("op_8126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8126_cast_fp16 = slice_by_index(begin = var_8126_begin_0, end = var_8126_end_0, end_mask = var_8126_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8126_cast_fp16")]; + tensor var_8130_begin_0 = const()[name = string("op_8130_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8130_end_0 = const()[name = string("op_8130_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8130_end_mask_0 = const()[name = string("op_8130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8130_cast_fp16 = slice_by_index(begin = var_8130_begin_0, end = var_8130_end_0, end_mask = var_8130_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8130_cast_fp16")]; + tensor var_8142_begin_0 = const()[name = string("op_8142_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8142_end_0 = const()[name = string("op_8142_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8142_end_mask_0 = const()[name = string("op_8142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8142_cast_fp16 = slice_by_index(begin = var_8142_begin_0, end = var_8142_end_0, end_mask = var_8142_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8142_cast_fp16")]; + tensor var_8146_begin_0 = const()[name = string("op_8146_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8146_end_0 = const()[name = string("op_8146_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8146_end_mask_0 = const()[name = string("op_8146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8146_cast_fp16 = slice_by_index(begin = var_8146_begin_0, end = var_8146_end_0, end_mask = var_8146_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8146_cast_fp16")]; + tensor var_8158_begin_0 = const()[name = string("op_8158_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8158_end_0 = const()[name = string("op_8158_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8158_end_mask_0 = const()[name = string("op_8158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8158_cast_fp16 = slice_by_index(begin = var_8158_begin_0, end = var_8158_end_0, end_mask = var_8158_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8158_cast_fp16")]; + tensor var_8162_begin_0 = const()[name = string("op_8162_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8162_end_0 = const()[name = string("op_8162_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8162_end_mask_0 = const()[name = string("op_8162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8162_cast_fp16 = slice_by_index(begin = var_8162_begin_0, end = var_8162_end_0, end_mask = var_8162_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8162_cast_fp16")]; + tensor var_8174_begin_0 = const()[name = string("op_8174_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8174_end_0 = const()[name = string("op_8174_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8174_end_mask_0 = const()[name = string("op_8174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8174_cast_fp16 = slice_by_index(begin = var_8174_begin_0, end = var_8174_end_0, end_mask = var_8174_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8174_cast_fp16")]; + tensor var_8178_begin_0 = const()[name = string("op_8178_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8178_end_0 = const()[name = string("op_8178_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8178_end_mask_0 = const()[name = string("op_8178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8178_cast_fp16 = slice_by_index(begin = var_8178_begin_0, end = var_8178_end_0, end_mask = var_8178_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8178_cast_fp16")]; + tensor var_8190_begin_0 = const()[name = string("op_8190_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8190_end_0 = const()[name = string("op_8190_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8190_end_mask_0 = const()[name = string("op_8190_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8190_cast_fp16 = slice_by_index(begin = var_8190_begin_0, end = var_8190_end_0, end_mask = var_8190_end_mask_0, x = key_heads_81_cast_fp16)[name = string("op_8190_cast_fp16")]; + tensor var_8194_begin_0 = const()[name = string("op_8194_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8194_end_0 = const()[name = string("op_8194_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8194_end_mask_0 = const()[name = string("op_8194_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8194_cast_fp16 = slice_by_index(begin = var_8194_begin_0, end = var_8194_end_0, end_mask = var_8194_end_mask_0, x = value_heads_81_cast_fp16)[name = string("op_8194_cast_fp16")]; + bool key_heads_83_interleave_0 = const()[name = string("key_heads_83_interleave_0"), val = bool(false)]; + tensor key_heads_83_cast_fp16 = concat(axis = var_7920, interleave = key_heads_83_interleave_0, values = (var_8078_cast_fp16, var_8078_cast_fp16, var_8094_cast_fp16, var_8094_cast_fp16, var_8110_cast_fp16, var_8110_cast_fp16, var_8126_cast_fp16, var_8126_cast_fp16, var_8142_cast_fp16, var_8142_cast_fp16, var_8158_cast_fp16, var_8158_cast_fp16, var_8174_cast_fp16, var_8174_cast_fp16, var_8190_cast_fp16, var_8190_cast_fp16))[name = string("key_heads_83_cast_fp16")]; + bool value_heads_83_interleave_0 = const()[name = string("value_heads_83_interleave_0"), val = bool(false)]; + tensor value_heads_83_cast_fp16 = concat(axis = var_7920, interleave = value_heads_83_interleave_0, values = (var_8082_cast_fp16, var_8082_cast_fp16, var_8098_cast_fp16, var_8098_cast_fp16, var_8114_cast_fp16, var_8114_cast_fp16, var_8130_cast_fp16, var_8130_cast_fp16, var_8146_cast_fp16, var_8146_cast_fp16, var_8162_cast_fp16, var_8162_cast_fp16, var_8178_cast_fp16, var_8178_cast_fp16, var_8194_cast_fp16, var_8194_cast_fp16))[name = string("value_heads_83_cast_fp16")]; + fp16 var_8217_to_fp16 = const()[name = string("op_8217_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_8218_cast_fp16 = mul(x = mh_q_123_cast_fp16, y = var_8217_to_fp16)[name = string("op_8218_cast_fp16")]; + bool mh_w_81_transpose_x_0 = const()[name = string("mh_w_81_transpose_x_0"), val = bool(true)]; + bool mh_w_81_transpose_y_0 = const()[name = string("mh_w_81_transpose_y_0"), val = bool(false)]; + tensor mh_w_81_cast_fp16 = matmul(transpose_x = mh_w_81_transpose_x_0, transpose_y = mh_w_81_transpose_y_0, x = var_8218_cast_fp16, y = key_heads_83_cast_fp16)[name = string("mh_w_81_cast_fp16")]; + tensor mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_83_cast_fp16")]; + tensor var_8230_cast_fp16 = softmax(axis = var_7902, x = mh_w_83_cast_fp16)[name = string("op_8230_cast_fp16")]; + bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)]; + bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = value_heads_83_cast_fp16, y = var_8230_cast_fp16)[name = string("attn_41_cast_fp16")]; + tensor var_8235 = const()[name = string("op_8235"), val = tensor([1, -1, 1, 1])]; + tensor input_161_cast_fp16 = reshape(shape = var_8235, x = attn_41_cast_fp16)[name = string("input_161_cast_fp16")]; + string obj_171_pad_type_0 = const()[name = string("obj_171_pad_type_0"), val = string("valid")]; + tensor obj_171_strides_0 = const()[name = string("obj_171_strides_0"), val = tensor([1, 1])]; + tensor obj_171_pad_0 = const()[name = string("obj_171_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_171_dilations_0 = const()[name = string("obj_171_dilations_0"), val = tensor([1, 1])]; + int32 obj_171_groups_0 = const()[name = string("obj_171_groups_0"), val = int32(1)]; + tensor layers_20_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319032192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321129408))))[name = string("layers_20_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_171_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_171_dilations_0, groups = obj_171_groups_0, pad = obj_171_pad_0, pad_type = obj_171_pad_type_0, strides = obj_171_strides_0, weight = layers_20_self_attn_o_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("obj_171_cast_fp16")]; + tensor inputs_165_cast_fp16 = add(x = inputs_159_cast_fp16, y = obj_171_cast_fp16)[name = string("inputs_165_cast_fp16")]; + tensor inputs_sq_167_cast_fp16 = mul(x = inputs_165_cast_fp16, y = inputs_165_cast_fp16)[name = string("inputs_sq_167_cast_fp16")]; + tensor variance_167_axes_0 = const()[name = string("variance_167_axes_0"), val = tensor([1])]; + bool variance_167_keep_dims_0 = const()[name = string("variance_167_keep_dims_0"), val = bool(true)]; + tensor variance_167_cast_fp16 = reduce_mean(axes = variance_167_axes_0, keep_dims = variance_167_keep_dims_0, x = inputs_sq_167_cast_fp16)[name = string("variance_167_cast_fp16")]; + fp16 var_8253_to_fp16 = const()[name = string("op_8253_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8254_cast_fp16 = add(x = variance_167_cast_fp16, y = var_8253_to_fp16)[name = string("op_8254_cast_fp16")]; + fp32 var_8255_epsilon_0 = const()[name = string("op_8255_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8255_cast_fp16 = rsqrt(epsilon = var_8255_epsilon_0, x = var_8254_cast_fp16)[name = string("op_8255_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = mul(x = inputs_165_cast_fp16, y = var_8255_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + tensor w_167_to_fp16 = const()[name = string("w_167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321129984)))]; + tensor input_163_cast_fp16 = mul(x = w_167_to_fp16, y = hidden_states_207_cast_fp16)[name = string("input_163_cast_fp16")]; + string input_165_pad_type_0 = const()[name = string("input_165_pad_type_0"), val = string("valid")]; + tensor input_165_strides_0 = const()[name = string("input_165_strides_0"), val = tensor([1, 1])]; + tensor input_165_pad_0 = const()[name = string("input_165_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_165_dilations_0 = const()[name = string("input_165_dilations_0"), val = tensor([1, 1])]; + int32 input_165_groups_0 = const()[name = string("input_165_groups_0"), val = int32(1)]; + tensor layers_20_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321132096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324277888))))[name = string("layers_20_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_165_cast_fp16 = conv(dilations = input_165_dilations_0, groups = input_165_groups_0, pad = input_165_pad_0, pad_type = input_165_pad_type_0, strides = input_165_strides_0, weight = layers_20_mlp_gate_proj_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("input_165_cast_fp16")]; + tensor var_8269_cast_fp16 = silu(x = input_165_cast_fp16)[name = string("op_8269_cast_fp16")]; + string var_8275_pad_type_0 = const()[name = string("op_8275_pad_type_0"), val = string("valid")]; + tensor var_8275_strides_0 = const()[name = string("op_8275_strides_0"), val = tensor([1, 1])]; + tensor var_8275_pad_0 = const()[name = string("op_8275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8275_dilations_0 = const()[name = string("op_8275_dilations_0"), val = tensor([1, 1])]; + int32 var_8275_groups_0 = const()[name = string("op_8275_groups_0"), val = int32(1)]; + tensor layers_20_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324278464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327424256))))[name = string("layers_20_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_8275_cast_fp16 = conv(dilations = var_8275_dilations_0, groups = var_8275_groups_0, pad = var_8275_pad_0, pad_type = var_8275_pad_type_0, strides = var_8275_strides_0, weight = layers_20_mlp_up_proj_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("op_8275_cast_fp16")]; + tensor input_167_cast_fp16 = mul(x = var_8269_cast_fp16, y = var_8275_cast_fp16)[name = string("input_167_cast_fp16")]; + string hidden_states_209_pad_type_0 = const()[name = string("hidden_states_209_pad_type_0"), val = string("valid")]; + tensor hidden_states_209_strides_0 = const()[name = string("hidden_states_209_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_209_pad_0 = const()[name = string("hidden_states_209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_209_dilations_0 = const()[name = string("hidden_states_209_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_209_groups_0 = const()[name = string("hidden_states_209_groups_0"), val = int32(1)]; + tensor layers_20_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327424832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330570624))))[name = string("layers_20_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_209_cast_fp16 = conv(dilations = hidden_states_209_dilations_0, groups = hidden_states_209_groups_0, pad = hidden_states_209_pad_0, pad_type = hidden_states_209_pad_type_0, strides = hidden_states_209_strides_0, weight = layers_20_mlp_down_proj_weight_to_fp16_palettized, x = input_167_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + tensor inputs_167_cast_fp16 = add(x = inputs_165_cast_fp16, y = hidden_states_209_cast_fp16)[name = string("inputs_167_cast_fp16")]; + int32 var_8289 = const()[name = string("op_8289"), val = int32(3)]; + int32 var_8299 = const()[name = string("op_8299"), val = int32(-2)]; + int32 var_8307 = const()[name = string("op_8307"), val = int32(1)]; + tensor inputs_sq_169_cast_fp16 = mul(x = inputs_167_cast_fp16, y = inputs_167_cast_fp16)[name = string("inputs_sq_169_cast_fp16")]; + tensor variance_169_axes_0 = const()[name = string("variance_169_axes_0"), val = tensor([1])]; + bool variance_169_keep_dims_0 = const()[name = string("variance_169_keep_dims_0"), val = bool(true)]; + tensor variance_169_cast_fp16 = reduce_mean(axes = variance_169_axes_0, keep_dims = variance_169_keep_dims_0, x = inputs_sq_169_cast_fp16)[name = string("variance_169_cast_fp16")]; + fp16 var_8319_to_fp16 = const()[name = string("op_8319_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8320_cast_fp16 = add(x = variance_169_cast_fp16, y = var_8319_to_fp16)[name = string("op_8320_cast_fp16")]; + fp32 var_8321_epsilon_0 = const()[name = string("op_8321_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8321_cast_fp16 = rsqrt(epsilon = var_8321_epsilon_0, x = var_8320_cast_fp16)[name = string("op_8321_cast_fp16")]; + tensor hidden_states_211_cast_fp16 = mul(x = inputs_167_cast_fp16, y = var_8321_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor w_169_to_fp16 = const()[name = string("w_169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330571200)))]; + tensor obj_173_cast_fp16 = mul(x = w_169_to_fp16, y = hidden_states_211_cast_fp16)[name = string("obj_173_cast_fp16")]; + string query_127_pad_type_0 = const()[name = string("query_127_pad_type_0"), val = string("valid")]; + tensor query_127_strides_0 = const()[name = string("query_127_strides_0"), val = tensor([1, 1])]; + tensor query_127_pad_0 = const()[name = string("query_127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_127_dilations_0 = const()[name = string("query_127_dilations_0"), val = tensor([1, 1])]; + int32 query_127_groups_0 = const()[name = string("query_127_groups_0"), val = int32(1)]; + tensor layers_21_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330573312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332670528))))[name = string("layers_21_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_127_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_127_dilations_0, groups = query_127_groups_0, pad = query_127_pad_0, pad_type = query_127_pad_type_0, strides = query_127_strides_0, weight = layers_21_self_attn_q_proj_weight_to_fp16_palettized, x = obj_173_cast_fp16)[name = string("query_127_cast_fp16")]; + string current_key_85_pad_type_0 = const()[name = string("current_key_85_pad_type_0"), val = string("valid")]; + tensor current_key_85_strides_0 = const()[name = string("current_key_85_strides_0"), val = tensor([1, 1])]; + tensor current_key_85_pad_0 = const()[name = string("current_key_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_85_dilations_0 = const()[name = string("current_key_85_dilations_0"), val = tensor([1, 1])]; + int32 current_key_85_groups_0 = const()[name = string("current_key_85_groups_0"), val = int32(1)]; + tensor layers_21_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332671104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333719744))))[name = string("layers_21_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_85_cast_fp16 = conv(dilations = current_key_85_dilations_0, groups = current_key_85_groups_0, pad = current_key_85_pad_0, pad_type = current_key_85_pad_type_0, strides = current_key_85_strides_0, weight = layers_21_self_attn_k_proj_weight_to_fp16_palettized, x = obj_173_cast_fp16)[name = string("current_key_85_cast_fp16")]; + string current_value_43_pad_type_0 = const()[name = string("current_value_43_pad_type_0"), val = string("valid")]; + tensor current_value_43_strides_0 = const()[name = string("current_value_43_strides_0"), val = tensor([1, 1])]; + tensor current_value_43_pad_0 = const()[name = string("current_value_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_43_dilations_0 = const()[name = string("current_value_43_dilations_0"), val = tensor([1, 1])]; + int32 current_value_43_groups_0 = const()[name = string("current_value_43_groups_0"), val = int32(1)]; + tensor layers_21_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333720320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334768960))))[name = string("layers_21_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_43_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_43_dilations_0, groups = current_value_43_groups_0, pad = current_value_43_pad_0, pad_type = current_value_43_pad_type_0, strides = current_value_43_strides_0, weight = layers_21_self_attn_v_proj_weight_to_fp16_palettized, x = obj_173_cast_fp16)[name = string("current_value_43_cast_fp16")]; + tensor var_8358 = const()[name = string("op_8358"), val = tensor([16, 128, 1, 1])]; + tensor inputs_169_cast_fp16 = reshape(shape = var_8358, x = query_127_cast_fp16)[name = string("inputs_169_cast_fp16")]; + tensor inputs_sq_171_cast_fp16 = mul(x = inputs_169_cast_fp16, y = inputs_169_cast_fp16)[name = string("inputs_sq_171_cast_fp16")]; + tensor variance_171_axes_0 = const()[name = string("variance_171_axes_0"), val = tensor([1])]; + bool variance_171_keep_dims_0 = const()[name = string("variance_171_keep_dims_0"), val = bool(true)]; + tensor variance_171_cast_fp16 = reduce_mean(axes = variance_171_axes_0, keep_dims = variance_171_keep_dims_0, x = inputs_sq_171_cast_fp16)[name = string("variance_171_cast_fp16")]; + fp16 var_8364_to_fp16 = const()[name = string("op_8364_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8365_cast_fp16 = add(x = variance_171_cast_fp16, y = var_8364_to_fp16)[name = string("op_8365_cast_fp16")]; + fp32 var_8366_epsilon_0 = const()[name = string("op_8366_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8366_cast_fp16 = rsqrt(epsilon = var_8366_epsilon_0, x = var_8365_cast_fp16)[name = string("op_8366_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = mul(x = inputs_169_cast_fp16, y = var_8366_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor w_171_to_fp16 = const()[name = string("w_171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334769536)))]; + tensor query_normed_43_cast_fp16 = mul(x = w_171_to_fp16, y = hidden_states_213_cast_fp16)[name = string("query_normed_43_cast_fp16")]; + tensor var_8374 = const()[name = string("op_8374"), val = tensor([8, 128, 1, 1])]; + tensor inputs_171_cast_fp16 = reshape(shape = var_8374, x = current_key_85_cast_fp16)[name = string("inputs_171_cast_fp16")]; + tensor inputs_sq_173_cast_fp16 = mul(x = inputs_171_cast_fp16, y = inputs_171_cast_fp16)[name = string("inputs_sq_173_cast_fp16")]; + tensor variance_173_axes_0 = const()[name = string("variance_173_axes_0"), val = tensor([1])]; + bool variance_173_keep_dims_0 = const()[name = string("variance_173_keep_dims_0"), val = bool(true)]; + tensor variance_173_cast_fp16 = reduce_mean(axes = variance_173_axes_0, keep_dims = variance_173_keep_dims_0, x = inputs_sq_173_cast_fp16)[name = string("variance_173_cast_fp16")]; + fp16 var_8380_to_fp16 = const()[name = string("op_8380_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8381_cast_fp16 = add(x = variance_173_cast_fp16, y = var_8380_to_fp16)[name = string("op_8381_cast_fp16")]; + fp32 var_8382_epsilon_0 = const()[name = string("op_8382_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8382_cast_fp16 = rsqrt(epsilon = var_8382_epsilon_0, x = var_8381_cast_fp16)[name = string("op_8382_cast_fp16")]; + tensor hidden_states_215_cast_fp16 = mul(x = inputs_171_cast_fp16, y = var_8382_cast_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor w_173_to_fp16 = const()[name = string("w_173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334769856)))]; + tensor current_key_normed_43_cast_fp16 = mul(x = w_173_to_fp16, y = hidden_states_215_cast_fp16)[name = string("current_key_normed_43_cast_fp16")]; + tensor var_8400 = const()[name = string("op_8400"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_127_cast_fp16 = reshape(shape = var_8400, x = query_normed_43_cast_fp16)[name = string("mh_q_127_cast_fp16")]; + tensor var_8402 = const()[name = string("op_8402"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_85_cast_fp16 = reshape(shape = var_8402, x = current_key_normed_43_cast_fp16)[name = string("mh_k_85_cast_fp16")]; + tensor var_8406_cast_fp16 = mul(x = mh_q_127_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8406_cast_fp16")]; + tensor var_8411_begin_0 = const()[name = string("op_8411_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8411_end_0 = const()[name = string("op_8411_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_8411_end_mask_0 = const()[name = string("op_8411_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8411_cast_fp16 = slice_by_index(begin = var_8411_begin_0, end = var_8411_end_0, end_mask = var_8411_end_mask_0, x = mh_q_127_cast_fp16)[name = string("op_8411_cast_fp16")]; + tensor var_8417_begin_0 = const()[name = string("op_8417_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8417_end_0 = const()[name = string("op_8417_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_8417_end_mask_0 = const()[name = string("op_8417_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8417_cast_fp16 = slice_by_index(begin = var_8417_begin_0, end = var_8417_end_0, end_mask = var_8417_end_mask_0, x = mh_q_127_cast_fp16)[name = string("op_8417_cast_fp16")]; + fp16 const_500_promoted_to_fp16 = const()[name = string("const_500_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8419_cast_fp16 = mul(x = var_8417_cast_fp16, y = const_500_promoted_to_fp16)[name = string("op_8419_cast_fp16")]; + bool var_8421_interleave_0 = const()[name = string("op_8421_interleave_0"), val = bool(false)]; + tensor var_8421_cast_fp16 = concat(axis = var_8299, interleave = var_8421_interleave_0, values = (var_8419_cast_fp16, var_8411_cast_fp16))[name = string("op_8421_cast_fp16")]; + tensor var_8422_cast_fp16 = mul(x = var_8421_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8422_cast_fp16")]; + tensor mh_q_129_cast_fp16 = add(x = var_8406_cast_fp16, y = var_8422_cast_fp16)[name = string("mh_q_129_cast_fp16")]; + tensor var_8424_cast_fp16 = mul(x = mh_k_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8424_cast_fp16")]; + tensor var_8429_begin_0 = const()[name = string("op_8429_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8429_end_0 = const()[name = string("op_8429_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_8429_end_mask_0 = const()[name = string("op_8429_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8429_cast_fp16 = slice_by_index(begin = var_8429_begin_0, end = var_8429_end_0, end_mask = var_8429_end_mask_0, x = mh_k_85_cast_fp16)[name = string("op_8429_cast_fp16")]; + tensor var_8435_begin_0 = const()[name = string("op_8435_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8435_end_0 = const()[name = string("op_8435_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_8435_end_mask_0 = const()[name = string("op_8435_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8435_cast_fp16 = slice_by_index(begin = var_8435_begin_0, end = var_8435_end_0, end_mask = var_8435_end_mask_0, x = mh_k_85_cast_fp16)[name = string("op_8435_cast_fp16")]; + fp16 const_503_promoted_to_fp16 = const()[name = string("const_503_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8437_cast_fp16 = mul(x = var_8435_cast_fp16, y = const_503_promoted_to_fp16)[name = string("op_8437_cast_fp16")]; + bool var_8439_interleave_0 = const()[name = string("op_8439_interleave_0"), val = bool(false)]; + tensor var_8439_cast_fp16 = concat(axis = var_8299, interleave = var_8439_interleave_0, values = (var_8437_cast_fp16, var_8429_cast_fp16))[name = string("op_8439_cast_fp16")]; + tensor var_8440_cast_fp16 = mul(x = var_8439_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8440_cast_fp16")]; + tensor mh_k_87_cast_fp16 = add(x = var_8424_cast_fp16, y = var_8440_cast_fp16)[name = string("mh_k_87_cast_fp16")]; + tensor var_8444 = const()[name = string("op_8444"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_87_cast_fp16 = reshape(shape = var_8444, x = mh_k_87_cast_fp16)[name = string("current_key_87_cast_fp16")]; + tensor var_8451_cast_fp16 = mul(x = var_101_cast_fp16_21, y = var_323_cast_fp16)[name = string("op_8451_cast_fp16")]; + tensor var_8452_cast_fp16 = mul(x = current_key_87_cast_fp16, y = var_321_cast_fp16)[name = string("op_8452_cast_fp16")]; + tensor key_129_cast_fp16 = add(x = var_8451_cast_fp16, y = var_8452_cast_fp16)[name = string("key_129_cast_fp16")]; + tensor var_8455_cast_fp16 = mul(x = var_132_cast_fp16_21, y = var_323_cast_fp16)[name = string("op_8455_cast_fp16")]; + tensor var_8456_cast_fp16 = mul(x = current_value_43_cast_fp16, y = var_321_cast_fp16)[name = string("op_8456_cast_fp16")]; + tensor value_85_cast_fp16 = add(x = var_8455_cast_fp16, y = var_8456_cast_fp16)[name = string("value_85_cast_fp16")]; + tensor var_8460 = const()[name = string("op_8460"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_85_cast_fp16 = reshape(shape = var_8460, x = key_129_cast_fp16)[name = string("key_heads_85_cast_fp16")]; + tensor var_8462 = const()[name = string("op_8462"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_85_cast_fp16 = reshape(shape = var_8462, x = value_85_cast_fp16)[name = string("value_heads_85_cast_fp16")]; + tensor var_8465_begin_0 = const()[name = string("op_8465_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8465_end_0 = const()[name = string("op_8465_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8465_end_mask_0 = const()[name = string("op_8465_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8465_cast_fp16 = slice_by_index(begin = var_8465_begin_0, end = var_8465_end_0, end_mask = var_8465_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8465_cast_fp16")]; + tensor var_8469_begin_0 = const()[name = string("op_8469_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8469_end_0 = const()[name = string("op_8469_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8469_end_mask_0 = const()[name = string("op_8469_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8469_cast_fp16 = slice_by_index(begin = var_8469_begin_0, end = var_8469_end_0, end_mask = var_8469_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8469_cast_fp16")]; + tensor var_8481_begin_0 = const()[name = string("op_8481_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8481_end_0 = const()[name = string("op_8481_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8481_end_mask_0 = const()[name = string("op_8481_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8481_cast_fp16 = slice_by_index(begin = var_8481_begin_0, end = var_8481_end_0, end_mask = var_8481_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8481_cast_fp16")]; + tensor var_8485_begin_0 = const()[name = string("op_8485_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8485_end_0 = const()[name = string("op_8485_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8485_end_mask_0 = const()[name = string("op_8485_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8485_cast_fp16 = slice_by_index(begin = var_8485_begin_0, end = var_8485_end_0, end_mask = var_8485_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8485_cast_fp16")]; + tensor var_8497_begin_0 = const()[name = string("op_8497_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8497_end_0 = const()[name = string("op_8497_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8497_end_mask_0 = const()[name = string("op_8497_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8497_cast_fp16 = slice_by_index(begin = var_8497_begin_0, end = var_8497_end_0, end_mask = var_8497_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8497_cast_fp16")]; + tensor var_8501_begin_0 = const()[name = string("op_8501_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8501_end_0 = const()[name = string("op_8501_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8501_end_mask_0 = const()[name = string("op_8501_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8501_cast_fp16 = slice_by_index(begin = var_8501_begin_0, end = var_8501_end_0, end_mask = var_8501_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8501_cast_fp16")]; + tensor var_8513_begin_0 = const()[name = string("op_8513_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8513_end_0 = const()[name = string("op_8513_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8513_end_mask_0 = const()[name = string("op_8513_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8513_cast_fp16 = slice_by_index(begin = var_8513_begin_0, end = var_8513_end_0, end_mask = var_8513_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8513_cast_fp16")]; + tensor var_8517_begin_0 = const()[name = string("op_8517_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8517_end_0 = const()[name = string("op_8517_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8517_end_mask_0 = const()[name = string("op_8517_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8517_cast_fp16 = slice_by_index(begin = var_8517_begin_0, end = var_8517_end_0, end_mask = var_8517_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8517_cast_fp16")]; + tensor var_8529_begin_0 = const()[name = string("op_8529_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8529_end_0 = const()[name = string("op_8529_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8529_end_mask_0 = const()[name = string("op_8529_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8529_cast_fp16 = slice_by_index(begin = var_8529_begin_0, end = var_8529_end_0, end_mask = var_8529_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8529_cast_fp16")]; + tensor var_8533_begin_0 = const()[name = string("op_8533_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8533_end_0 = const()[name = string("op_8533_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8533_end_mask_0 = const()[name = string("op_8533_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8533_cast_fp16 = slice_by_index(begin = var_8533_begin_0, end = var_8533_end_0, end_mask = var_8533_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8533_cast_fp16")]; + tensor var_8545_begin_0 = const()[name = string("op_8545_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8545_end_0 = const()[name = string("op_8545_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8545_end_mask_0 = const()[name = string("op_8545_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8545_cast_fp16 = slice_by_index(begin = var_8545_begin_0, end = var_8545_end_0, end_mask = var_8545_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8545_cast_fp16")]; + tensor var_8549_begin_0 = const()[name = string("op_8549_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8549_end_0 = const()[name = string("op_8549_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8549_end_mask_0 = const()[name = string("op_8549_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8549_cast_fp16 = slice_by_index(begin = var_8549_begin_0, end = var_8549_end_0, end_mask = var_8549_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8549_cast_fp16")]; + tensor var_8561_begin_0 = const()[name = string("op_8561_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8561_end_0 = const()[name = string("op_8561_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8561_end_mask_0 = const()[name = string("op_8561_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8561_cast_fp16 = slice_by_index(begin = var_8561_begin_0, end = var_8561_end_0, end_mask = var_8561_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8561_cast_fp16")]; + tensor var_8565_begin_0 = const()[name = string("op_8565_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8565_end_0 = const()[name = string("op_8565_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8565_end_mask_0 = const()[name = string("op_8565_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8565_cast_fp16 = slice_by_index(begin = var_8565_begin_0, end = var_8565_end_0, end_mask = var_8565_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8565_cast_fp16")]; + tensor var_8577_begin_0 = const()[name = string("op_8577_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8577_end_0 = const()[name = string("op_8577_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8577_end_mask_0 = const()[name = string("op_8577_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8577_cast_fp16 = slice_by_index(begin = var_8577_begin_0, end = var_8577_end_0, end_mask = var_8577_end_mask_0, x = key_heads_85_cast_fp16)[name = string("op_8577_cast_fp16")]; + tensor var_8581_begin_0 = const()[name = string("op_8581_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8581_end_0 = const()[name = string("op_8581_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8581_end_mask_0 = const()[name = string("op_8581_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8581_cast_fp16 = slice_by_index(begin = var_8581_begin_0, end = var_8581_end_0, end_mask = var_8581_end_mask_0, x = value_heads_85_cast_fp16)[name = string("op_8581_cast_fp16")]; + bool key_heads_87_interleave_0 = const()[name = string("key_heads_87_interleave_0"), val = bool(false)]; + tensor key_heads_87_cast_fp16 = concat(axis = var_8307, interleave = key_heads_87_interleave_0, values = (var_8465_cast_fp16, var_8465_cast_fp16, var_8481_cast_fp16, var_8481_cast_fp16, var_8497_cast_fp16, var_8497_cast_fp16, var_8513_cast_fp16, var_8513_cast_fp16, var_8529_cast_fp16, var_8529_cast_fp16, var_8545_cast_fp16, var_8545_cast_fp16, var_8561_cast_fp16, var_8561_cast_fp16, var_8577_cast_fp16, var_8577_cast_fp16))[name = string("key_heads_87_cast_fp16")]; + bool value_heads_87_interleave_0 = const()[name = string("value_heads_87_interleave_0"), val = bool(false)]; + tensor value_heads_87_cast_fp16 = concat(axis = var_8307, interleave = value_heads_87_interleave_0, values = (var_8469_cast_fp16, var_8469_cast_fp16, var_8485_cast_fp16, var_8485_cast_fp16, var_8501_cast_fp16, var_8501_cast_fp16, var_8517_cast_fp16, var_8517_cast_fp16, var_8533_cast_fp16, var_8533_cast_fp16, var_8549_cast_fp16, var_8549_cast_fp16, var_8565_cast_fp16, var_8565_cast_fp16, var_8581_cast_fp16, var_8581_cast_fp16))[name = string("value_heads_87_cast_fp16")]; + fp16 var_8604_to_fp16 = const()[name = string("op_8604_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_8605_cast_fp16 = mul(x = mh_q_129_cast_fp16, y = var_8604_to_fp16)[name = string("op_8605_cast_fp16")]; + bool mh_w_85_transpose_x_0 = const()[name = string("mh_w_85_transpose_x_0"), val = bool(true)]; + bool mh_w_85_transpose_y_0 = const()[name = string("mh_w_85_transpose_y_0"), val = bool(false)]; + tensor mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_8605_cast_fp16, y = key_heads_87_cast_fp16)[name = string("mh_w_85_cast_fp16")]; + tensor mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_87_cast_fp16")]; + tensor var_8617_cast_fp16 = softmax(axis = var_8289, x = mh_w_87_cast_fp16)[name = string("op_8617_cast_fp16")]; + bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)]; + bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = value_heads_87_cast_fp16, y = var_8617_cast_fp16)[name = string("attn_43_cast_fp16")]; + tensor var_8622 = const()[name = string("op_8622"), val = tensor([1, -1, 1, 1])]; + tensor input_169_cast_fp16 = reshape(shape = var_8622, x = attn_43_cast_fp16)[name = string("input_169_cast_fp16")]; + string obj_179_pad_type_0 = const()[name = string("obj_179_pad_type_0"), val = string("valid")]; + tensor obj_179_strides_0 = const()[name = string("obj_179_strides_0"), val = tensor([1, 1])]; + tensor obj_179_pad_0 = const()[name = string("obj_179_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_179_dilations_0 = const()[name = string("obj_179_dilations_0"), val = tensor([1, 1])]; + int32 obj_179_groups_0 = const()[name = string("obj_179_groups_0"), val = int32(1)]; + tensor layers_21_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334770176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336867392))))[name = string("layers_21_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_179_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_179_dilations_0, groups = obj_179_groups_0, pad = obj_179_pad_0, pad_type = obj_179_pad_type_0, strides = obj_179_strides_0, weight = layers_21_self_attn_o_proj_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = string("obj_179_cast_fp16")]; + tensor inputs_173_cast_fp16 = add(x = inputs_167_cast_fp16, y = obj_179_cast_fp16)[name = string("inputs_173_cast_fp16")]; + tensor inputs_sq_175_cast_fp16 = mul(x = inputs_173_cast_fp16, y = inputs_173_cast_fp16)[name = string("inputs_sq_175_cast_fp16")]; + tensor variance_175_axes_0 = const()[name = string("variance_175_axes_0"), val = tensor([1])]; + bool variance_175_keep_dims_0 = const()[name = string("variance_175_keep_dims_0"), val = bool(true)]; + tensor variance_175_cast_fp16 = reduce_mean(axes = variance_175_axes_0, keep_dims = variance_175_keep_dims_0, x = inputs_sq_175_cast_fp16)[name = string("variance_175_cast_fp16")]; + fp16 var_8640_to_fp16 = const()[name = string("op_8640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8641_cast_fp16 = add(x = variance_175_cast_fp16, y = var_8640_to_fp16)[name = string("op_8641_cast_fp16")]; + fp32 var_8642_epsilon_0 = const()[name = string("op_8642_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8642_cast_fp16 = rsqrt(epsilon = var_8642_epsilon_0, x = var_8641_cast_fp16)[name = string("op_8642_cast_fp16")]; + tensor hidden_states_217_cast_fp16 = mul(x = inputs_173_cast_fp16, y = var_8642_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; + tensor w_175_to_fp16 = const()[name = string("w_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336867968)))]; + tensor input_171_cast_fp16 = mul(x = w_175_to_fp16, y = hidden_states_217_cast_fp16)[name = string("input_171_cast_fp16")]; + string input_173_pad_type_0 = const()[name = string("input_173_pad_type_0"), val = string("valid")]; + tensor input_173_strides_0 = const()[name = string("input_173_strides_0"), val = tensor([1, 1])]; + tensor input_173_pad_0 = const()[name = string("input_173_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_173_dilations_0 = const()[name = string("input_173_dilations_0"), val = tensor([1, 1])]; + int32 input_173_groups_0 = const()[name = string("input_173_groups_0"), val = int32(1)]; + tensor layers_21_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336870080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340015872))))[name = string("layers_21_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_173_cast_fp16 = conv(dilations = input_173_dilations_0, groups = input_173_groups_0, pad = input_173_pad_0, pad_type = input_173_pad_type_0, strides = input_173_strides_0, weight = layers_21_mlp_gate_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("input_173_cast_fp16")]; + tensor var_8656_cast_fp16 = silu(x = input_173_cast_fp16)[name = string("op_8656_cast_fp16")]; + string var_8662_pad_type_0 = const()[name = string("op_8662_pad_type_0"), val = string("valid")]; + tensor var_8662_strides_0 = const()[name = string("op_8662_strides_0"), val = tensor([1, 1])]; + tensor var_8662_pad_0 = const()[name = string("op_8662_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8662_dilations_0 = const()[name = string("op_8662_dilations_0"), val = tensor([1, 1])]; + int32 var_8662_groups_0 = const()[name = string("op_8662_groups_0"), val = int32(1)]; + tensor layers_21_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340016448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343162240))))[name = string("layers_21_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_8662_cast_fp16 = conv(dilations = var_8662_dilations_0, groups = var_8662_groups_0, pad = var_8662_pad_0, pad_type = var_8662_pad_type_0, strides = var_8662_strides_0, weight = layers_21_mlp_up_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("op_8662_cast_fp16")]; + tensor input_175_cast_fp16 = mul(x = var_8656_cast_fp16, y = var_8662_cast_fp16)[name = string("input_175_cast_fp16")]; + string hidden_states_219_pad_type_0 = const()[name = string("hidden_states_219_pad_type_0"), val = string("valid")]; + tensor hidden_states_219_strides_0 = const()[name = string("hidden_states_219_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_219_pad_0 = const()[name = string("hidden_states_219_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_219_dilations_0 = const()[name = string("hidden_states_219_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_219_groups_0 = const()[name = string("hidden_states_219_groups_0"), val = int32(1)]; + tensor layers_21_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343162816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346308608))))[name = string("layers_21_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_219_cast_fp16 = conv(dilations = hidden_states_219_dilations_0, groups = hidden_states_219_groups_0, pad = hidden_states_219_pad_0, pad_type = hidden_states_219_pad_type_0, strides = hidden_states_219_strides_0, weight = layers_21_mlp_down_proj_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; + tensor inputs_175_cast_fp16 = add(x = inputs_173_cast_fp16, y = hidden_states_219_cast_fp16)[name = string("inputs_175_cast_fp16")]; + int32 var_8676 = const()[name = string("op_8676"), val = int32(3)]; + int32 var_8686 = const()[name = string("op_8686"), val = int32(-2)]; + int32 var_8694 = const()[name = string("op_8694"), val = int32(1)]; + tensor inputs_sq_177_cast_fp16 = mul(x = inputs_175_cast_fp16, y = inputs_175_cast_fp16)[name = string("inputs_sq_177_cast_fp16")]; + tensor variance_177_axes_0 = const()[name = string("variance_177_axes_0"), val = tensor([1])]; + bool variance_177_keep_dims_0 = const()[name = string("variance_177_keep_dims_0"), val = bool(true)]; + tensor variance_177_cast_fp16 = reduce_mean(axes = variance_177_axes_0, keep_dims = variance_177_keep_dims_0, x = inputs_sq_177_cast_fp16)[name = string("variance_177_cast_fp16")]; + fp16 var_8706_to_fp16 = const()[name = string("op_8706_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8707_cast_fp16 = add(x = variance_177_cast_fp16, y = var_8706_to_fp16)[name = string("op_8707_cast_fp16")]; + fp32 var_8708_epsilon_0 = const()[name = string("op_8708_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8708_cast_fp16 = rsqrt(epsilon = var_8708_epsilon_0, x = var_8707_cast_fp16)[name = string("op_8708_cast_fp16")]; + tensor hidden_states_221_cast_fp16 = mul(x = inputs_175_cast_fp16, y = var_8708_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; + tensor w_177_to_fp16 = const()[name = string("w_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346309184)))]; + tensor obj_181_cast_fp16 = mul(x = w_177_to_fp16, y = hidden_states_221_cast_fp16)[name = string("obj_181_cast_fp16")]; + string query_133_pad_type_0 = const()[name = string("query_133_pad_type_0"), val = string("valid")]; + tensor query_133_strides_0 = const()[name = string("query_133_strides_0"), val = tensor([1, 1])]; + tensor query_133_pad_0 = const()[name = string("query_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_133_dilations_0 = const()[name = string("query_133_dilations_0"), val = tensor([1, 1])]; + int32 query_133_groups_0 = const()[name = string("query_133_groups_0"), val = int32(1)]; + tensor layers_22_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346311296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348408512))))[name = string("layers_22_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_133_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_133_dilations_0, groups = query_133_groups_0, pad = query_133_pad_0, pad_type = query_133_pad_type_0, strides = query_133_strides_0, weight = layers_22_self_attn_q_proj_weight_to_fp16_palettized, x = obj_181_cast_fp16)[name = string("query_133_cast_fp16")]; + string current_key_89_pad_type_0 = const()[name = string("current_key_89_pad_type_0"), val = string("valid")]; + tensor current_key_89_strides_0 = const()[name = string("current_key_89_strides_0"), val = tensor([1, 1])]; + tensor current_key_89_pad_0 = const()[name = string("current_key_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_89_dilations_0 = const()[name = string("current_key_89_dilations_0"), val = tensor([1, 1])]; + int32 current_key_89_groups_0 = const()[name = string("current_key_89_groups_0"), val = int32(1)]; + tensor layers_22_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348409088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349457728))))[name = string("layers_22_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_89_cast_fp16 = conv(dilations = current_key_89_dilations_0, groups = current_key_89_groups_0, pad = current_key_89_pad_0, pad_type = current_key_89_pad_type_0, strides = current_key_89_strides_0, weight = layers_22_self_attn_k_proj_weight_to_fp16_palettized, x = obj_181_cast_fp16)[name = string("current_key_89_cast_fp16")]; + string current_value_45_pad_type_0 = const()[name = string("current_value_45_pad_type_0"), val = string("valid")]; + tensor current_value_45_strides_0 = const()[name = string("current_value_45_strides_0"), val = tensor([1, 1])]; + tensor current_value_45_pad_0 = const()[name = string("current_value_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_45_dilations_0 = const()[name = string("current_value_45_dilations_0"), val = tensor([1, 1])]; + int32 current_value_45_groups_0 = const()[name = string("current_value_45_groups_0"), val = int32(1)]; + tensor layers_22_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349458304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350506944))))[name = string("layers_22_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_45_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_45_dilations_0, groups = current_value_45_groups_0, pad = current_value_45_pad_0, pad_type = current_value_45_pad_type_0, strides = current_value_45_strides_0, weight = layers_22_self_attn_v_proj_weight_to_fp16_palettized, x = obj_181_cast_fp16)[name = string("current_value_45_cast_fp16")]; + tensor var_8745 = const()[name = string("op_8745"), val = tensor([16, 128, 1, 1])]; + tensor inputs_177_cast_fp16 = reshape(shape = var_8745, x = query_133_cast_fp16)[name = string("inputs_177_cast_fp16")]; + tensor inputs_sq_179_cast_fp16 = mul(x = inputs_177_cast_fp16, y = inputs_177_cast_fp16)[name = string("inputs_sq_179_cast_fp16")]; + tensor variance_179_axes_0 = const()[name = string("variance_179_axes_0"), val = tensor([1])]; + bool variance_179_keep_dims_0 = const()[name = string("variance_179_keep_dims_0"), val = bool(true)]; + tensor variance_179_cast_fp16 = reduce_mean(axes = variance_179_axes_0, keep_dims = variance_179_keep_dims_0, x = inputs_sq_179_cast_fp16)[name = string("variance_179_cast_fp16")]; + fp16 var_8751_to_fp16 = const()[name = string("op_8751_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8752_cast_fp16 = add(x = variance_179_cast_fp16, y = var_8751_to_fp16)[name = string("op_8752_cast_fp16")]; + fp32 var_8753_epsilon_0 = const()[name = string("op_8753_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8753_cast_fp16 = rsqrt(epsilon = var_8753_epsilon_0, x = var_8752_cast_fp16)[name = string("op_8753_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = mul(x = inputs_177_cast_fp16, y = var_8753_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + tensor w_179_to_fp16 = const()[name = string("w_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350507520)))]; + tensor query_normed_45_cast_fp16 = mul(x = w_179_to_fp16, y = hidden_states_223_cast_fp16)[name = string("query_normed_45_cast_fp16")]; + tensor var_8761 = const()[name = string("op_8761"), val = tensor([8, 128, 1, 1])]; + tensor inputs_179_cast_fp16 = reshape(shape = var_8761, x = current_key_89_cast_fp16)[name = string("inputs_179_cast_fp16")]; + tensor inputs_sq_181_cast_fp16 = mul(x = inputs_179_cast_fp16, y = inputs_179_cast_fp16)[name = string("inputs_sq_181_cast_fp16")]; + tensor variance_181_axes_0 = const()[name = string("variance_181_axes_0"), val = tensor([1])]; + bool variance_181_keep_dims_0 = const()[name = string("variance_181_keep_dims_0"), val = bool(true)]; + tensor variance_181_cast_fp16 = reduce_mean(axes = variance_181_axes_0, keep_dims = variance_181_keep_dims_0, x = inputs_sq_181_cast_fp16)[name = string("variance_181_cast_fp16")]; + fp16 var_8767_to_fp16 = const()[name = string("op_8767_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_8768_cast_fp16 = add(x = variance_181_cast_fp16, y = var_8767_to_fp16)[name = string("op_8768_cast_fp16")]; + fp32 var_8769_epsilon_0 = const()[name = string("op_8769_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_8769_cast_fp16 = rsqrt(epsilon = var_8769_epsilon_0, x = var_8768_cast_fp16)[name = string("op_8769_cast_fp16")]; + tensor hidden_states_225_cast_fp16 = mul(x = inputs_179_cast_fp16, y = var_8769_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; + tensor w_181_to_fp16 = const()[name = string("w_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350507840)))]; + tensor current_key_normed_45_cast_fp16 = mul(x = w_181_to_fp16, y = hidden_states_225_cast_fp16)[name = string("current_key_normed_45_cast_fp16")]; + tensor var_8787 = const()[name = string("op_8787"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_133_cast_fp16 = reshape(shape = var_8787, x = query_normed_45_cast_fp16)[name = string("mh_q_133_cast_fp16")]; + tensor var_8789 = const()[name = string("op_8789"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_89_cast_fp16 = reshape(shape = var_8789, x = current_key_normed_45_cast_fp16)[name = string("mh_k_89_cast_fp16")]; + tensor var_8793_cast_fp16 = mul(x = mh_q_133_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8793_cast_fp16")]; + tensor var_8798_begin_0 = const()[name = string("op_8798_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8798_end_0 = const()[name = string("op_8798_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_8798_end_mask_0 = const()[name = string("op_8798_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8798_cast_fp16 = slice_by_index(begin = var_8798_begin_0, end = var_8798_end_0, end_mask = var_8798_end_mask_0, x = mh_q_133_cast_fp16)[name = string("op_8798_cast_fp16")]; + tensor var_8804_begin_0 = const()[name = string("op_8804_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8804_end_0 = const()[name = string("op_8804_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_8804_end_mask_0 = const()[name = string("op_8804_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8804_cast_fp16 = slice_by_index(begin = var_8804_begin_0, end = var_8804_end_0, end_mask = var_8804_end_mask_0, x = mh_q_133_cast_fp16)[name = string("op_8804_cast_fp16")]; + fp16 const_523_promoted_to_fp16 = const()[name = string("const_523_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8806_cast_fp16 = mul(x = var_8804_cast_fp16, y = const_523_promoted_to_fp16)[name = string("op_8806_cast_fp16")]; + bool var_8808_interleave_0 = const()[name = string("op_8808_interleave_0"), val = bool(false)]; + tensor var_8808_cast_fp16 = concat(axis = var_8686, interleave = var_8808_interleave_0, values = (var_8806_cast_fp16, var_8798_cast_fp16))[name = string("op_8808_cast_fp16")]; + tensor var_8809_cast_fp16 = mul(x = var_8808_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8809_cast_fp16")]; + tensor mh_q_135_cast_fp16 = add(x = var_8793_cast_fp16, y = var_8809_cast_fp16)[name = string("mh_q_135_cast_fp16")]; + tensor var_8811_cast_fp16 = mul(x = mh_k_89_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8811_cast_fp16")]; + tensor var_8816_begin_0 = const()[name = string("op_8816_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8816_end_0 = const()[name = string("op_8816_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_8816_end_mask_0 = const()[name = string("op_8816_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_8816_cast_fp16 = slice_by_index(begin = var_8816_begin_0, end = var_8816_end_0, end_mask = var_8816_end_mask_0, x = mh_k_89_cast_fp16)[name = string("op_8816_cast_fp16")]; + tensor var_8822_begin_0 = const()[name = string("op_8822_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_8822_end_0 = const()[name = string("op_8822_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_8822_end_mask_0 = const()[name = string("op_8822_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8822_cast_fp16 = slice_by_index(begin = var_8822_begin_0, end = var_8822_end_0, end_mask = var_8822_end_mask_0, x = mh_k_89_cast_fp16)[name = string("op_8822_cast_fp16")]; + fp16 const_526_promoted_to_fp16 = const()[name = string("const_526_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8824_cast_fp16 = mul(x = var_8822_cast_fp16, y = const_526_promoted_to_fp16)[name = string("op_8824_cast_fp16")]; + bool var_8826_interleave_0 = const()[name = string("op_8826_interleave_0"), val = bool(false)]; + tensor var_8826_cast_fp16 = concat(axis = var_8686, interleave = var_8826_interleave_0, values = (var_8824_cast_fp16, var_8816_cast_fp16))[name = string("op_8826_cast_fp16")]; + tensor var_8827_cast_fp16 = mul(x = var_8826_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8827_cast_fp16")]; + tensor mh_k_91_cast_fp16 = add(x = var_8811_cast_fp16, y = var_8827_cast_fp16)[name = string("mh_k_91_cast_fp16")]; + tensor var_8831 = const()[name = string("op_8831"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_91_cast_fp16 = reshape(shape = var_8831, x = mh_k_91_cast_fp16)[name = string("current_key_91_cast_fp16")]; + tensor var_8838_cast_fp16 = mul(x = var_101_cast_fp16_22, y = var_323_cast_fp16)[name = string("op_8838_cast_fp16")]; + tensor var_8839_cast_fp16 = mul(x = current_key_91_cast_fp16, y = var_321_cast_fp16)[name = string("op_8839_cast_fp16")]; + tensor key_135_cast_fp16 = add(x = var_8838_cast_fp16, y = var_8839_cast_fp16)[name = string("key_135_cast_fp16")]; + tensor var_8842_cast_fp16 = mul(x = var_132_cast_fp16_22, y = var_323_cast_fp16)[name = string("op_8842_cast_fp16")]; + tensor var_8843_cast_fp16 = mul(x = current_value_45_cast_fp16, y = var_321_cast_fp16)[name = string("op_8843_cast_fp16")]; + tensor value_89_cast_fp16 = add(x = var_8842_cast_fp16, y = var_8843_cast_fp16)[name = string("value_89_cast_fp16")]; + tensor var_8847 = const()[name = string("op_8847"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_89_cast_fp16 = reshape(shape = var_8847, x = key_135_cast_fp16)[name = string("key_heads_89_cast_fp16")]; + tensor var_8849 = const()[name = string("op_8849"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_89_cast_fp16 = reshape(shape = var_8849, x = value_89_cast_fp16)[name = string("value_heads_89_cast_fp16")]; + tensor var_8852_begin_0 = const()[name = string("op_8852_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8852_end_0 = const()[name = string("op_8852_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8852_end_mask_0 = const()[name = string("op_8852_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8852_cast_fp16 = slice_by_index(begin = var_8852_begin_0, end = var_8852_end_0, end_mask = var_8852_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8852_cast_fp16")]; + tensor var_8856_begin_0 = const()[name = string("op_8856_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8856_end_0 = const()[name = string("op_8856_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8856_end_mask_0 = const()[name = string("op_8856_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8856_cast_fp16 = slice_by_index(begin = var_8856_begin_0, end = var_8856_end_0, end_mask = var_8856_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8856_cast_fp16")]; + tensor var_8868_begin_0 = const()[name = string("op_8868_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8868_end_0 = const()[name = string("op_8868_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8868_end_mask_0 = const()[name = string("op_8868_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8868_cast_fp16 = slice_by_index(begin = var_8868_begin_0, end = var_8868_end_0, end_mask = var_8868_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8868_cast_fp16")]; + tensor var_8872_begin_0 = const()[name = string("op_8872_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_8872_end_0 = const()[name = string("op_8872_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_8872_end_mask_0 = const()[name = string("op_8872_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8872_cast_fp16 = slice_by_index(begin = var_8872_begin_0, end = var_8872_end_0, end_mask = var_8872_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8872_cast_fp16")]; + tensor var_8884_begin_0 = const()[name = string("op_8884_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8884_end_0 = const()[name = string("op_8884_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8884_end_mask_0 = const()[name = string("op_8884_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8884_cast_fp16 = slice_by_index(begin = var_8884_begin_0, end = var_8884_end_0, end_mask = var_8884_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8884_cast_fp16")]; + tensor var_8888_begin_0 = const()[name = string("op_8888_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_8888_end_0 = const()[name = string("op_8888_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_8888_end_mask_0 = const()[name = string("op_8888_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8888_cast_fp16 = slice_by_index(begin = var_8888_begin_0, end = var_8888_end_0, end_mask = var_8888_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8888_cast_fp16")]; + tensor var_8900_begin_0 = const()[name = string("op_8900_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8900_end_0 = const()[name = string("op_8900_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8900_end_mask_0 = const()[name = string("op_8900_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8900_cast_fp16 = slice_by_index(begin = var_8900_begin_0, end = var_8900_end_0, end_mask = var_8900_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8900_cast_fp16")]; + tensor var_8904_begin_0 = const()[name = string("op_8904_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_8904_end_0 = const()[name = string("op_8904_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_8904_end_mask_0 = const()[name = string("op_8904_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8904_cast_fp16 = slice_by_index(begin = var_8904_begin_0, end = var_8904_end_0, end_mask = var_8904_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8904_cast_fp16")]; + tensor var_8916_begin_0 = const()[name = string("op_8916_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8916_end_0 = const()[name = string("op_8916_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8916_end_mask_0 = const()[name = string("op_8916_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8916_cast_fp16 = slice_by_index(begin = var_8916_begin_0, end = var_8916_end_0, end_mask = var_8916_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8916_cast_fp16")]; + tensor var_8920_begin_0 = const()[name = string("op_8920_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_8920_end_0 = const()[name = string("op_8920_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_8920_end_mask_0 = const()[name = string("op_8920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8920_cast_fp16 = slice_by_index(begin = var_8920_begin_0, end = var_8920_end_0, end_mask = var_8920_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8920_cast_fp16")]; + tensor var_8932_begin_0 = const()[name = string("op_8932_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8932_end_0 = const()[name = string("op_8932_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8932_end_mask_0 = const()[name = string("op_8932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8932_cast_fp16 = slice_by_index(begin = var_8932_begin_0, end = var_8932_end_0, end_mask = var_8932_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8932_cast_fp16")]; + tensor var_8936_begin_0 = const()[name = string("op_8936_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_8936_end_0 = const()[name = string("op_8936_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_8936_end_mask_0 = const()[name = string("op_8936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8936_cast_fp16 = slice_by_index(begin = var_8936_begin_0, end = var_8936_end_0, end_mask = var_8936_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8936_cast_fp16")]; + tensor var_8948_begin_0 = const()[name = string("op_8948_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8948_end_0 = const()[name = string("op_8948_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8948_end_mask_0 = const()[name = string("op_8948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8948_cast_fp16 = slice_by_index(begin = var_8948_begin_0, end = var_8948_end_0, end_mask = var_8948_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8948_cast_fp16")]; + tensor var_8952_begin_0 = const()[name = string("op_8952_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_8952_end_0 = const()[name = string("op_8952_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_8952_end_mask_0 = const()[name = string("op_8952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8952_cast_fp16 = slice_by_index(begin = var_8952_begin_0, end = var_8952_end_0, end_mask = var_8952_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8952_cast_fp16")]; + tensor var_8964_begin_0 = const()[name = string("op_8964_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8964_end_0 = const()[name = string("op_8964_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8964_end_mask_0 = const()[name = string("op_8964_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8964_cast_fp16 = slice_by_index(begin = var_8964_begin_0, end = var_8964_end_0, end_mask = var_8964_end_mask_0, x = key_heads_89_cast_fp16)[name = string("op_8964_cast_fp16")]; + tensor var_8968_begin_0 = const()[name = string("op_8968_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_8968_end_0 = const()[name = string("op_8968_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_8968_end_mask_0 = const()[name = string("op_8968_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_8968_cast_fp16 = slice_by_index(begin = var_8968_begin_0, end = var_8968_end_0, end_mask = var_8968_end_mask_0, x = value_heads_89_cast_fp16)[name = string("op_8968_cast_fp16")]; + bool key_heads_91_interleave_0 = const()[name = string("key_heads_91_interleave_0"), val = bool(false)]; + tensor key_heads_91_cast_fp16 = concat(axis = var_8694, interleave = key_heads_91_interleave_0, values = (var_8852_cast_fp16, var_8852_cast_fp16, var_8868_cast_fp16, var_8868_cast_fp16, var_8884_cast_fp16, var_8884_cast_fp16, var_8900_cast_fp16, var_8900_cast_fp16, var_8916_cast_fp16, var_8916_cast_fp16, var_8932_cast_fp16, var_8932_cast_fp16, var_8948_cast_fp16, var_8948_cast_fp16, var_8964_cast_fp16, var_8964_cast_fp16))[name = string("key_heads_91_cast_fp16")]; + bool value_heads_91_interleave_0 = const()[name = string("value_heads_91_interleave_0"), val = bool(false)]; + tensor value_heads_91_cast_fp16 = concat(axis = var_8694, interleave = value_heads_91_interleave_0, values = (var_8856_cast_fp16, var_8856_cast_fp16, var_8872_cast_fp16, var_8872_cast_fp16, var_8888_cast_fp16, var_8888_cast_fp16, var_8904_cast_fp16, var_8904_cast_fp16, var_8920_cast_fp16, var_8920_cast_fp16, var_8936_cast_fp16, var_8936_cast_fp16, var_8952_cast_fp16, var_8952_cast_fp16, var_8968_cast_fp16, var_8968_cast_fp16))[name = string("value_heads_91_cast_fp16")]; + fp16 var_8991_to_fp16 = const()[name = string("op_8991_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_8992_cast_fp16 = mul(x = mh_q_135_cast_fp16, y = var_8991_to_fp16)[name = string("op_8992_cast_fp16")]; + bool mh_w_89_transpose_x_0 = const()[name = string("mh_w_89_transpose_x_0"), val = bool(true)]; + bool mh_w_89_transpose_y_0 = const()[name = string("mh_w_89_transpose_y_0"), val = bool(false)]; + tensor mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_8992_cast_fp16, y = key_heads_91_cast_fp16)[name = string("mh_w_89_cast_fp16")]; + tensor mh_w_91_cast_fp16 = add(x = mh_w_89_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_91_cast_fp16")]; + tensor var_9004_cast_fp16 = softmax(axis = var_8676, x = mh_w_91_cast_fp16)[name = string("op_9004_cast_fp16")]; + bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)]; + bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = value_heads_91_cast_fp16, y = var_9004_cast_fp16)[name = string("attn_45_cast_fp16")]; + tensor var_9009 = const()[name = string("op_9009"), val = tensor([1, -1, 1, 1])]; + tensor input_177_cast_fp16 = reshape(shape = var_9009, x = attn_45_cast_fp16)[name = string("input_177_cast_fp16")]; + string obj_187_pad_type_0 = const()[name = string("obj_187_pad_type_0"), val = string("valid")]; + tensor obj_187_strides_0 = const()[name = string("obj_187_strides_0"), val = tensor([1, 1])]; + tensor obj_187_pad_0 = const()[name = string("obj_187_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_187_dilations_0 = const()[name = string("obj_187_dilations_0"), val = tensor([1, 1])]; + int32 obj_187_groups_0 = const()[name = string("obj_187_groups_0"), val = int32(1)]; + tensor layers_22_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350508160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352605376))))[name = string("layers_22_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_187_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_187_dilations_0, groups = obj_187_groups_0, pad = obj_187_pad_0, pad_type = obj_187_pad_type_0, strides = obj_187_strides_0, weight = layers_22_self_attn_o_proj_weight_to_fp16_palettized, x = input_177_cast_fp16)[name = string("obj_187_cast_fp16")]; + tensor inputs_181_cast_fp16 = add(x = inputs_175_cast_fp16, y = obj_187_cast_fp16)[name = string("inputs_181_cast_fp16")]; + tensor inputs_sq_183_cast_fp16 = mul(x = inputs_181_cast_fp16, y = inputs_181_cast_fp16)[name = string("inputs_sq_183_cast_fp16")]; + tensor variance_183_axes_0 = const()[name = string("variance_183_axes_0"), val = tensor([1])]; + bool variance_183_keep_dims_0 = const()[name = string("variance_183_keep_dims_0"), val = bool(true)]; + tensor variance_183_cast_fp16 = reduce_mean(axes = variance_183_axes_0, keep_dims = variance_183_keep_dims_0, x = inputs_sq_183_cast_fp16)[name = string("variance_183_cast_fp16")]; + fp16 var_9027_to_fp16 = const()[name = string("op_9027_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9028_cast_fp16 = add(x = variance_183_cast_fp16, y = var_9027_to_fp16)[name = string("op_9028_cast_fp16")]; + fp32 var_9029_epsilon_0 = const()[name = string("op_9029_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9029_cast_fp16 = rsqrt(epsilon = var_9029_epsilon_0, x = var_9028_cast_fp16)[name = string("op_9029_cast_fp16")]; + tensor hidden_states_227_cast_fp16 = mul(x = inputs_181_cast_fp16, y = var_9029_cast_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor w_183_to_fp16 = const()[name = string("w_183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352605952)))]; + tensor input_179_cast_fp16 = mul(x = w_183_to_fp16, y = hidden_states_227_cast_fp16)[name = string("input_179_cast_fp16")]; + string input_181_pad_type_0 = const()[name = string("input_181_pad_type_0"), val = string("valid")]; + tensor input_181_strides_0 = const()[name = string("input_181_strides_0"), val = tensor([1, 1])]; + tensor input_181_pad_0 = const()[name = string("input_181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_181_dilations_0 = const()[name = string("input_181_dilations_0"), val = tensor([1, 1])]; + int32 input_181_groups_0 = const()[name = string("input_181_groups_0"), val = int32(1)]; + tensor layers_22_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352608064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355753856))))[name = string("layers_22_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_181_cast_fp16 = conv(dilations = input_181_dilations_0, groups = input_181_groups_0, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = input_181_strides_0, weight = layers_22_mlp_gate_proj_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("input_181_cast_fp16")]; + tensor var_9043_cast_fp16 = silu(x = input_181_cast_fp16)[name = string("op_9043_cast_fp16")]; + string var_9049_pad_type_0 = const()[name = string("op_9049_pad_type_0"), val = string("valid")]; + tensor var_9049_strides_0 = const()[name = string("op_9049_strides_0"), val = tensor([1, 1])]; + tensor var_9049_pad_0 = const()[name = string("op_9049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9049_dilations_0 = const()[name = string("op_9049_dilations_0"), val = tensor([1, 1])]; + int32 var_9049_groups_0 = const()[name = string("op_9049_groups_0"), val = int32(1)]; + tensor layers_22_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355754432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358900224))))[name = string("layers_22_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_9049_cast_fp16 = conv(dilations = var_9049_dilations_0, groups = var_9049_groups_0, pad = var_9049_pad_0, pad_type = var_9049_pad_type_0, strides = var_9049_strides_0, weight = layers_22_mlp_up_proj_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("op_9049_cast_fp16")]; + tensor input_183_cast_fp16 = mul(x = var_9043_cast_fp16, y = var_9049_cast_fp16)[name = string("input_183_cast_fp16")]; + string hidden_states_229_pad_type_0 = const()[name = string("hidden_states_229_pad_type_0"), val = string("valid")]; + tensor hidden_states_229_strides_0 = const()[name = string("hidden_states_229_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_229_pad_0 = const()[name = string("hidden_states_229_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_229_dilations_0 = const()[name = string("hidden_states_229_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_229_groups_0 = const()[name = string("hidden_states_229_groups_0"), val = int32(1)]; + tensor layers_22_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358900800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362046592))))[name = string("layers_22_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_229_cast_fp16 = conv(dilations = hidden_states_229_dilations_0, groups = hidden_states_229_groups_0, pad = hidden_states_229_pad_0, pad_type = hidden_states_229_pad_type_0, strides = hidden_states_229_strides_0, weight = layers_22_mlp_down_proj_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + tensor inputs_183_cast_fp16 = add(x = inputs_181_cast_fp16, y = hidden_states_229_cast_fp16)[name = string("inputs_183_cast_fp16")]; + int32 var_9063 = const()[name = string("op_9063"), val = int32(3)]; + int32 var_9073 = const()[name = string("op_9073"), val = int32(-2)]; + int32 var_9081 = const()[name = string("op_9081"), val = int32(1)]; + tensor inputs_sq_185_cast_fp16 = mul(x = inputs_183_cast_fp16, y = inputs_183_cast_fp16)[name = string("inputs_sq_185_cast_fp16")]; + tensor variance_185_axes_0 = const()[name = string("variance_185_axes_0"), val = tensor([1])]; + bool variance_185_keep_dims_0 = const()[name = string("variance_185_keep_dims_0"), val = bool(true)]; + tensor variance_185_cast_fp16 = reduce_mean(axes = variance_185_axes_0, keep_dims = variance_185_keep_dims_0, x = inputs_sq_185_cast_fp16)[name = string("variance_185_cast_fp16")]; + fp16 var_9093_to_fp16 = const()[name = string("op_9093_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9094_cast_fp16 = add(x = variance_185_cast_fp16, y = var_9093_to_fp16)[name = string("op_9094_cast_fp16")]; + fp32 var_9095_epsilon_0 = const()[name = string("op_9095_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9095_cast_fp16 = rsqrt(epsilon = var_9095_epsilon_0, x = var_9094_cast_fp16)[name = string("op_9095_cast_fp16")]; + tensor hidden_states_231_cast_fp16 = mul(x = inputs_183_cast_fp16, y = var_9095_cast_fp16)[name = string("hidden_states_231_cast_fp16")]; + tensor w_185_to_fp16 = const()[name = string("w_185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362047168)))]; + tensor obj_189_cast_fp16 = mul(x = w_185_to_fp16, y = hidden_states_231_cast_fp16)[name = string("obj_189_cast_fp16")]; + string query_139_pad_type_0 = const()[name = string("query_139_pad_type_0"), val = string("valid")]; + tensor query_139_strides_0 = const()[name = string("query_139_strides_0"), val = tensor([1, 1])]; + tensor query_139_pad_0 = const()[name = string("query_139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_139_dilations_0 = const()[name = string("query_139_dilations_0"), val = tensor([1, 1])]; + int32 query_139_groups_0 = const()[name = string("query_139_groups_0"), val = int32(1)]; + tensor layers_23_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362049280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364146496))))[name = string("layers_23_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_139_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_139_dilations_0, groups = query_139_groups_0, pad = query_139_pad_0, pad_type = query_139_pad_type_0, strides = query_139_strides_0, weight = layers_23_self_attn_q_proj_weight_to_fp16_palettized, x = obj_189_cast_fp16)[name = string("query_139_cast_fp16")]; + string current_key_93_pad_type_0 = const()[name = string("current_key_93_pad_type_0"), val = string("valid")]; + tensor current_key_93_strides_0 = const()[name = string("current_key_93_strides_0"), val = tensor([1, 1])]; + tensor current_key_93_pad_0 = const()[name = string("current_key_93_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_93_dilations_0 = const()[name = string("current_key_93_dilations_0"), val = tensor([1, 1])]; + int32 current_key_93_groups_0 = const()[name = string("current_key_93_groups_0"), val = int32(1)]; + tensor layers_23_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364147072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365195712))))[name = string("layers_23_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_93_cast_fp16 = conv(dilations = current_key_93_dilations_0, groups = current_key_93_groups_0, pad = current_key_93_pad_0, pad_type = current_key_93_pad_type_0, strides = current_key_93_strides_0, weight = layers_23_self_attn_k_proj_weight_to_fp16_palettized, x = obj_189_cast_fp16)[name = string("current_key_93_cast_fp16")]; + string current_value_47_pad_type_0 = const()[name = string("current_value_47_pad_type_0"), val = string("valid")]; + tensor current_value_47_strides_0 = const()[name = string("current_value_47_strides_0"), val = tensor([1, 1])]; + tensor current_value_47_pad_0 = const()[name = string("current_value_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_47_dilations_0 = const()[name = string("current_value_47_dilations_0"), val = tensor([1, 1])]; + int32 current_value_47_groups_0 = const()[name = string("current_value_47_groups_0"), val = int32(1)]; + tensor layers_23_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365196288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366244928))))[name = string("layers_23_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_47_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_47_dilations_0, groups = current_value_47_groups_0, pad = current_value_47_pad_0, pad_type = current_value_47_pad_type_0, strides = current_value_47_strides_0, weight = layers_23_self_attn_v_proj_weight_to_fp16_palettized, x = obj_189_cast_fp16)[name = string("current_value_47_cast_fp16")]; + tensor var_9132 = const()[name = string("op_9132"), val = tensor([16, 128, 1, 1])]; + tensor inputs_185_cast_fp16 = reshape(shape = var_9132, x = query_139_cast_fp16)[name = string("inputs_185_cast_fp16")]; + tensor inputs_sq_187_cast_fp16 = mul(x = inputs_185_cast_fp16, y = inputs_185_cast_fp16)[name = string("inputs_sq_187_cast_fp16")]; + tensor variance_187_axes_0 = const()[name = string("variance_187_axes_0"), val = tensor([1])]; + bool variance_187_keep_dims_0 = const()[name = string("variance_187_keep_dims_0"), val = bool(true)]; + tensor variance_187_cast_fp16 = reduce_mean(axes = variance_187_axes_0, keep_dims = variance_187_keep_dims_0, x = inputs_sq_187_cast_fp16)[name = string("variance_187_cast_fp16")]; + fp16 var_9138_to_fp16 = const()[name = string("op_9138_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9139_cast_fp16 = add(x = variance_187_cast_fp16, y = var_9138_to_fp16)[name = string("op_9139_cast_fp16")]; + fp32 var_9140_epsilon_0 = const()[name = string("op_9140_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9140_cast_fp16 = rsqrt(epsilon = var_9140_epsilon_0, x = var_9139_cast_fp16)[name = string("op_9140_cast_fp16")]; + tensor hidden_states_233_cast_fp16 = mul(x = inputs_185_cast_fp16, y = var_9140_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor w_187_to_fp16 = const()[name = string("w_187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366245504)))]; + tensor query_normed_47_cast_fp16 = mul(x = w_187_to_fp16, y = hidden_states_233_cast_fp16)[name = string("query_normed_47_cast_fp16")]; + tensor var_9148 = const()[name = string("op_9148"), val = tensor([8, 128, 1, 1])]; + tensor inputs_187_cast_fp16 = reshape(shape = var_9148, x = current_key_93_cast_fp16)[name = string("inputs_187_cast_fp16")]; + tensor inputs_sq_189_cast_fp16 = mul(x = inputs_187_cast_fp16, y = inputs_187_cast_fp16)[name = string("inputs_sq_189_cast_fp16")]; + tensor variance_189_axes_0 = const()[name = string("variance_189_axes_0"), val = tensor([1])]; + bool variance_189_keep_dims_0 = const()[name = string("variance_189_keep_dims_0"), val = bool(true)]; + tensor variance_189_cast_fp16 = reduce_mean(axes = variance_189_axes_0, keep_dims = variance_189_keep_dims_0, x = inputs_sq_189_cast_fp16)[name = string("variance_189_cast_fp16")]; + fp16 var_9154_to_fp16 = const()[name = string("op_9154_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9155_cast_fp16 = add(x = variance_189_cast_fp16, y = var_9154_to_fp16)[name = string("op_9155_cast_fp16")]; + fp32 var_9156_epsilon_0 = const()[name = string("op_9156_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9156_cast_fp16 = rsqrt(epsilon = var_9156_epsilon_0, x = var_9155_cast_fp16)[name = string("op_9156_cast_fp16")]; + tensor hidden_states_235_cast_fp16 = mul(x = inputs_187_cast_fp16, y = var_9156_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; + tensor w_189_to_fp16 = const()[name = string("w_189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366245824)))]; + tensor current_key_normed_47_cast_fp16 = mul(x = w_189_to_fp16, y = hidden_states_235_cast_fp16)[name = string("current_key_normed_47_cast_fp16")]; + tensor var_9174 = const()[name = string("op_9174"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_139_cast_fp16 = reshape(shape = var_9174, x = query_normed_47_cast_fp16)[name = string("mh_q_139_cast_fp16")]; + tensor var_9176 = const()[name = string("op_9176"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_93_cast_fp16 = reshape(shape = var_9176, x = current_key_normed_47_cast_fp16)[name = string("mh_k_93_cast_fp16")]; + tensor var_9180_cast_fp16 = mul(x = mh_q_139_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9180_cast_fp16")]; + tensor var_9185_begin_0 = const()[name = string("op_9185_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9185_end_0 = const()[name = string("op_9185_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_9185_end_mask_0 = const()[name = string("op_9185_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9185_cast_fp16 = slice_by_index(begin = var_9185_begin_0, end = var_9185_end_0, end_mask = var_9185_end_mask_0, x = mh_q_139_cast_fp16)[name = string("op_9185_cast_fp16")]; + tensor var_9191_begin_0 = const()[name = string("op_9191_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9191_end_0 = const()[name = string("op_9191_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_9191_end_mask_0 = const()[name = string("op_9191_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9191_cast_fp16 = slice_by_index(begin = var_9191_begin_0, end = var_9191_end_0, end_mask = var_9191_end_mask_0, x = mh_q_139_cast_fp16)[name = string("op_9191_cast_fp16")]; + fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9193_cast_fp16 = mul(x = var_9191_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_9193_cast_fp16")]; + bool var_9195_interleave_0 = const()[name = string("op_9195_interleave_0"), val = bool(false)]; + tensor var_9195_cast_fp16 = concat(axis = var_9073, interleave = var_9195_interleave_0, values = (var_9193_cast_fp16, var_9185_cast_fp16))[name = string("op_9195_cast_fp16")]; + tensor var_9196_cast_fp16 = mul(x = var_9195_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9196_cast_fp16")]; + tensor mh_q_141_cast_fp16 = add(x = var_9180_cast_fp16, y = var_9196_cast_fp16)[name = string("mh_q_141_cast_fp16")]; + tensor var_9198_cast_fp16 = mul(x = mh_k_93_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9198_cast_fp16")]; + tensor var_9203_begin_0 = const()[name = string("op_9203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9203_end_0 = const()[name = string("op_9203_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_9203_end_mask_0 = const()[name = string("op_9203_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9203_cast_fp16 = slice_by_index(begin = var_9203_begin_0, end = var_9203_end_0, end_mask = var_9203_end_mask_0, x = mh_k_93_cast_fp16)[name = string("op_9203_cast_fp16")]; + tensor var_9209_begin_0 = const()[name = string("op_9209_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9209_end_0 = const()[name = string("op_9209_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_9209_end_mask_0 = const()[name = string("op_9209_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9209_cast_fp16 = slice_by_index(begin = var_9209_begin_0, end = var_9209_end_0, end_mask = var_9209_end_mask_0, x = mh_k_93_cast_fp16)[name = string("op_9209_cast_fp16")]; + fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9211_cast_fp16 = mul(x = var_9209_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_9211_cast_fp16")]; + bool var_9213_interleave_0 = const()[name = string("op_9213_interleave_0"), val = bool(false)]; + tensor var_9213_cast_fp16 = concat(axis = var_9073, interleave = var_9213_interleave_0, values = (var_9211_cast_fp16, var_9203_cast_fp16))[name = string("op_9213_cast_fp16")]; + tensor var_9214_cast_fp16 = mul(x = var_9213_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9214_cast_fp16")]; + tensor mh_k_95_cast_fp16 = add(x = var_9198_cast_fp16, y = var_9214_cast_fp16)[name = string("mh_k_95_cast_fp16")]; + tensor var_9218 = const()[name = string("op_9218"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_95_cast_fp16 = reshape(shape = var_9218, x = mh_k_95_cast_fp16)[name = string("current_key_95_cast_fp16")]; + tensor var_9225_cast_fp16 = mul(x = var_101_cast_fp16_23, y = var_323_cast_fp16)[name = string("op_9225_cast_fp16")]; + tensor var_9226_cast_fp16 = mul(x = current_key_95_cast_fp16, y = var_321_cast_fp16)[name = string("op_9226_cast_fp16")]; + tensor key_141_cast_fp16 = add(x = var_9225_cast_fp16, y = var_9226_cast_fp16)[name = string("key_141_cast_fp16")]; + tensor var_9229_cast_fp16 = mul(x = var_132_cast_fp16_23, y = var_323_cast_fp16)[name = string("op_9229_cast_fp16")]; + tensor var_9230_cast_fp16 = mul(x = current_value_47_cast_fp16, y = var_321_cast_fp16)[name = string("op_9230_cast_fp16")]; + tensor value_93_cast_fp16 = add(x = var_9229_cast_fp16, y = var_9230_cast_fp16)[name = string("value_93_cast_fp16")]; + tensor var_9234 = const()[name = string("op_9234"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_93_cast_fp16 = reshape(shape = var_9234, x = key_141_cast_fp16)[name = string("key_heads_93_cast_fp16")]; + tensor var_9236 = const()[name = string("op_9236"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_93_cast_fp16 = reshape(shape = var_9236, x = value_93_cast_fp16)[name = string("value_heads_93_cast_fp16")]; + tensor var_9239_begin_0 = const()[name = string("op_9239_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9239_end_0 = const()[name = string("op_9239_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9239_end_mask_0 = const()[name = string("op_9239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9239_cast_fp16 = slice_by_index(begin = var_9239_begin_0, end = var_9239_end_0, end_mask = var_9239_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9239_cast_fp16")]; + tensor var_9243_begin_0 = const()[name = string("op_9243_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9243_end_0 = const()[name = string("op_9243_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9243_end_mask_0 = const()[name = string("op_9243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9243_cast_fp16 = slice_by_index(begin = var_9243_begin_0, end = var_9243_end_0, end_mask = var_9243_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9243_cast_fp16")]; + tensor var_9255_begin_0 = const()[name = string("op_9255_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_9255_end_0 = const()[name = string("op_9255_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_9255_end_mask_0 = const()[name = string("op_9255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9255_cast_fp16 = slice_by_index(begin = var_9255_begin_0, end = var_9255_end_0, end_mask = var_9255_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9255_cast_fp16")]; + tensor var_9259_begin_0 = const()[name = string("op_9259_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_9259_end_0 = const()[name = string("op_9259_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_9259_end_mask_0 = const()[name = string("op_9259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9259_cast_fp16 = slice_by_index(begin = var_9259_begin_0, end = var_9259_end_0, end_mask = var_9259_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9259_cast_fp16")]; + tensor var_9271_begin_0 = const()[name = string("op_9271_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_9271_end_0 = const()[name = string("op_9271_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_9271_end_mask_0 = const()[name = string("op_9271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9271_cast_fp16 = slice_by_index(begin = var_9271_begin_0, end = var_9271_end_0, end_mask = var_9271_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9271_cast_fp16")]; + tensor var_9275_begin_0 = const()[name = string("op_9275_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_9275_end_0 = const()[name = string("op_9275_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_9275_end_mask_0 = const()[name = string("op_9275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9275_cast_fp16 = slice_by_index(begin = var_9275_begin_0, end = var_9275_end_0, end_mask = var_9275_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9275_cast_fp16")]; + tensor var_9287_begin_0 = const()[name = string("op_9287_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_9287_end_0 = const()[name = string("op_9287_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_9287_end_mask_0 = const()[name = string("op_9287_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9287_cast_fp16 = slice_by_index(begin = var_9287_begin_0, end = var_9287_end_0, end_mask = var_9287_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9287_cast_fp16")]; + tensor var_9291_begin_0 = const()[name = string("op_9291_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_9291_end_0 = const()[name = string("op_9291_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_9291_end_mask_0 = const()[name = string("op_9291_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9291_cast_fp16 = slice_by_index(begin = var_9291_begin_0, end = var_9291_end_0, end_mask = var_9291_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9291_cast_fp16")]; + tensor var_9303_begin_0 = const()[name = string("op_9303_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_9303_end_0 = const()[name = string("op_9303_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_9303_end_mask_0 = const()[name = string("op_9303_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9303_cast_fp16 = slice_by_index(begin = var_9303_begin_0, end = var_9303_end_0, end_mask = var_9303_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9303_cast_fp16")]; + tensor var_9307_begin_0 = const()[name = string("op_9307_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_9307_end_0 = const()[name = string("op_9307_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_9307_end_mask_0 = const()[name = string("op_9307_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9307_cast_fp16 = slice_by_index(begin = var_9307_begin_0, end = var_9307_end_0, end_mask = var_9307_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9307_cast_fp16")]; + tensor var_9319_begin_0 = const()[name = string("op_9319_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_9319_end_0 = const()[name = string("op_9319_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_9319_end_mask_0 = const()[name = string("op_9319_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9319_cast_fp16 = slice_by_index(begin = var_9319_begin_0, end = var_9319_end_0, end_mask = var_9319_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9319_cast_fp16")]; + tensor var_9323_begin_0 = const()[name = string("op_9323_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_9323_end_0 = const()[name = string("op_9323_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_9323_end_mask_0 = const()[name = string("op_9323_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9323_cast_fp16 = slice_by_index(begin = var_9323_begin_0, end = var_9323_end_0, end_mask = var_9323_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9323_cast_fp16")]; + tensor var_9335_begin_0 = const()[name = string("op_9335_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_9335_end_0 = const()[name = string("op_9335_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_9335_end_mask_0 = const()[name = string("op_9335_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9335_cast_fp16 = slice_by_index(begin = var_9335_begin_0, end = var_9335_end_0, end_mask = var_9335_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9335_cast_fp16")]; + tensor var_9339_begin_0 = const()[name = string("op_9339_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_9339_end_0 = const()[name = string("op_9339_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_9339_end_mask_0 = const()[name = string("op_9339_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9339_cast_fp16 = slice_by_index(begin = var_9339_begin_0, end = var_9339_end_0, end_mask = var_9339_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9339_cast_fp16")]; + tensor var_9351_begin_0 = const()[name = string("op_9351_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_9351_end_0 = const()[name = string("op_9351_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9351_end_mask_0 = const()[name = string("op_9351_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9351_cast_fp16 = slice_by_index(begin = var_9351_begin_0, end = var_9351_end_0, end_mask = var_9351_end_mask_0, x = key_heads_93_cast_fp16)[name = string("op_9351_cast_fp16")]; + tensor var_9355_begin_0 = const()[name = string("op_9355_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_9355_end_0 = const()[name = string("op_9355_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9355_end_mask_0 = const()[name = string("op_9355_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9355_cast_fp16 = slice_by_index(begin = var_9355_begin_0, end = var_9355_end_0, end_mask = var_9355_end_mask_0, x = value_heads_93_cast_fp16)[name = string("op_9355_cast_fp16")]; + bool key_heads_95_interleave_0 = const()[name = string("key_heads_95_interleave_0"), val = bool(false)]; + tensor key_heads_95_cast_fp16 = concat(axis = var_9081, interleave = key_heads_95_interleave_0, values = (var_9239_cast_fp16, var_9239_cast_fp16, var_9255_cast_fp16, var_9255_cast_fp16, var_9271_cast_fp16, var_9271_cast_fp16, var_9287_cast_fp16, var_9287_cast_fp16, var_9303_cast_fp16, var_9303_cast_fp16, var_9319_cast_fp16, var_9319_cast_fp16, var_9335_cast_fp16, var_9335_cast_fp16, var_9351_cast_fp16, var_9351_cast_fp16))[name = string("key_heads_95_cast_fp16")]; + bool value_heads_95_interleave_0 = const()[name = string("value_heads_95_interleave_0"), val = bool(false)]; + tensor value_heads_95_cast_fp16 = concat(axis = var_9081, interleave = value_heads_95_interleave_0, values = (var_9243_cast_fp16, var_9243_cast_fp16, var_9259_cast_fp16, var_9259_cast_fp16, var_9275_cast_fp16, var_9275_cast_fp16, var_9291_cast_fp16, var_9291_cast_fp16, var_9307_cast_fp16, var_9307_cast_fp16, var_9323_cast_fp16, var_9323_cast_fp16, var_9339_cast_fp16, var_9339_cast_fp16, var_9355_cast_fp16, var_9355_cast_fp16))[name = string("value_heads_95_cast_fp16")]; + fp16 var_9378_to_fp16 = const()[name = string("op_9378_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_9379_cast_fp16 = mul(x = mh_q_141_cast_fp16, y = var_9378_to_fp16)[name = string("op_9379_cast_fp16")]; + bool mh_w_93_transpose_x_0 = const()[name = string("mh_w_93_transpose_x_0"), val = bool(true)]; + bool mh_w_93_transpose_y_0 = const()[name = string("mh_w_93_transpose_y_0"), val = bool(false)]; + tensor mh_w_93_cast_fp16 = matmul(transpose_x = mh_w_93_transpose_x_0, transpose_y = mh_w_93_transpose_y_0, x = var_9379_cast_fp16, y = key_heads_95_cast_fp16)[name = string("mh_w_93_cast_fp16")]; + tensor mh_w_95_cast_fp16 = add(x = mh_w_93_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_95_cast_fp16")]; + tensor var_9391_cast_fp16 = softmax(axis = var_9063, x = mh_w_95_cast_fp16)[name = string("op_9391_cast_fp16")]; + bool attn_47_transpose_x_0 = const()[name = string("attn_47_transpose_x_0"), val = bool(false)]; + bool attn_47_transpose_y_0 = const()[name = string("attn_47_transpose_y_0"), val = bool(true)]; + tensor attn_47_cast_fp16 = matmul(transpose_x = attn_47_transpose_x_0, transpose_y = attn_47_transpose_y_0, x = value_heads_95_cast_fp16, y = var_9391_cast_fp16)[name = string("attn_47_cast_fp16")]; + tensor var_9396 = const()[name = string("op_9396"), val = tensor([1, -1, 1, 1])]; + tensor input_185_cast_fp16 = reshape(shape = var_9396, x = attn_47_cast_fp16)[name = string("input_185_cast_fp16")]; + string obj_195_pad_type_0 = const()[name = string("obj_195_pad_type_0"), val = string("valid")]; + tensor obj_195_strides_0 = const()[name = string("obj_195_strides_0"), val = tensor([1, 1])]; + tensor obj_195_pad_0 = const()[name = string("obj_195_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_195_dilations_0 = const()[name = string("obj_195_dilations_0"), val = tensor([1, 1])]; + int32 obj_195_groups_0 = const()[name = string("obj_195_groups_0"), val = int32(1)]; + tensor layers_23_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366246144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368343360))))[name = string("layers_23_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_195_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_195_dilations_0, groups = obj_195_groups_0, pad = obj_195_pad_0, pad_type = obj_195_pad_type_0, strides = obj_195_strides_0, weight = layers_23_self_attn_o_proj_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("obj_195_cast_fp16")]; + tensor inputs_189_cast_fp16 = add(x = inputs_183_cast_fp16, y = obj_195_cast_fp16)[name = string("inputs_189_cast_fp16")]; + tensor inputs_sq_191_cast_fp16 = mul(x = inputs_189_cast_fp16, y = inputs_189_cast_fp16)[name = string("inputs_sq_191_cast_fp16")]; + tensor variance_191_axes_0 = const()[name = string("variance_191_axes_0"), val = tensor([1])]; + bool variance_191_keep_dims_0 = const()[name = string("variance_191_keep_dims_0"), val = bool(true)]; + tensor variance_191_cast_fp16 = reduce_mean(axes = variance_191_axes_0, keep_dims = variance_191_keep_dims_0, x = inputs_sq_191_cast_fp16)[name = string("variance_191_cast_fp16")]; + fp16 var_9414_to_fp16 = const()[name = string("op_9414_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9415_cast_fp16 = add(x = variance_191_cast_fp16, y = var_9414_to_fp16)[name = string("op_9415_cast_fp16")]; + fp32 var_9416_epsilon_0 = const()[name = string("op_9416_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9416_cast_fp16 = rsqrt(epsilon = var_9416_epsilon_0, x = var_9415_cast_fp16)[name = string("op_9416_cast_fp16")]; + tensor hidden_states_237_cast_fp16 = mul(x = inputs_189_cast_fp16, y = var_9416_cast_fp16)[name = string("hidden_states_237_cast_fp16")]; + tensor w_191_to_fp16 = const()[name = string("w_191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368343936)))]; + tensor input_187_cast_fp16 = mul(x = w_191_to_fp16, y = hidden_states_237_cast_fp16)[name = string("input_187_cast_fp16")]; + string input_189_pad_type_0 = const()[name = string("input_189_pad_type_0"), val = string("valid")]; + tensor input_189_strides_0 = const()[name = string("input_189_strides_0"), val = tensor([1, 1])]; + tensor input_189_pad_0 = const()[name = string("input_189_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_189_dilations_0 = const()[name = string("input_189_dilations_0"), val = tensor([1, 1])]; + int32 input_189_groups_0 = const()[name = string("input_189_groups_0"), val = int32(1)]; + tensor layers_23_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368346048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371491840))))[name = string("layers_23_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_189_cast_fp16 = conv(dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = layers_23_mlp_gate_proj_weight_to_fp16_palettized, x = input_187_cast_fp16)[name = string("input_189_cast_fp16")]; + tensor var_9430_cast_fp16 = silu(x = input_189_cast_fp16)[name = string("op_9430_cast_fp16")]; + string var_9436_pad_type_0 = const()[name = string("op_9436_pad_type_0"), val = string("valid")]; + tensor var_9436_strides_0 = const()[name = string("op_9436_strides_0"), val = tensor([1, 1])]; + tensor var_9436_pad_0 = const()[name = string("op_9436_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9436_dilations_0 = const()[name = string("op_9436_dilations_0"), val = tensor([1, 1])]; + int32 var_9436_groups_0 = const()[name = string("op_9436_groups_0"), val = int32(1)]; + tensor layers_23_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371492416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374638208))))[name = string("layers_23_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_9436_cast_fp16 = conv(dilations = var_9436_dilations_0, groups = var_9436_groups_0, pad = var_9436_pad_0, pad_type = var_9436_pad_type_0, strides = var_9436_strides_0, weight = layers_23_mlp_up_proj_weight_to_fp16_palettized, x = input_187_cast_fp16)[name = string("op_9436_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_9430_cast_fp16, y = var_9436_cast_fp16)[name = string("input_191_cast_fp16")]; + string hidden_states_239_pad_type_0 = const()[name = string("hidden_states_239_pad_type_0"), val = string("valid")]; + tensor hidden_states_239_strides_0 = const()[name = string("hidden_states_239_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_239_pad_0 = const()[name = string("hidden_states_239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_239_dilations_0 = const()[name = string("hidden_states_239_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_239_groups_0 = const()[name = string("hidden_states_239_groups_0"), val = int32(1)]; + tensor layers_23_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374638784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377784576))))[name = string("layers_23_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_239_cast_fp16 = conv(dilations = hidden_states_239_dilations_0, groups = hidden_states_239_groups_0, pad = hidden_states_239_pad_0, pad_type = hidden_states_239_pad_type_0, strides = hidden_states_239_strides_0, weight = layers_23_mlp_down_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + tensor inputs_191_cast_fp16 = add(x = inputs_189_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("inputs_191_cast_fp16")]; + int32 var_9450 = const()[name = string("op_9450"), val = int32(3)]; + int32 var_9460 = const()[name = string("op_9460"), val = int32(-2)]; + int32 var_9468 = const()[name = string("op_9468"), val = int32(1)]; + tensor inputs_sq_193_cast_fp16 = mul(x = inputs_191_cast_fp16, y = inputs_191_cast_fp16)[name = string("inputs_sq_193_cast_fp16")]; + tensor variance_193_axes_0 = const()[name = string("variance_193_axes_0"), val = tensor([1])]; + bool variance_193_keep_dims_0 = const()[name = string("variance_193_keep_dims_0"), val = bool(true)]; + tensor variance_193_cast_fp16 = reduce_mean(axes = variance_193_axes_0, keep_dims = variance_193_keep_dims_0, x = inputs_sq_193_cast_fp16)[name = string("variance_193_cast_fp16")]; + fp16 var_9480_to_fp16 = const()[name = string("op_9480_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9481_cast_fp16 = add(x = variance_193_cast_fp16, y = var_9480_to_fp16)[name = string("op_9481_cast_fp16")]; + fp32 var_9482_epsilon_0 = const()[name = string("op_9482_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9482_cast_fp16 = rsqrt(epsilon = var_9482_epsilon_0, x = var_9481_cast_fp16)[name = string("op_9482_cast_fp16")]; + tensor hidden_states_241_cast_fp16 = mul(x = inputs_191_cast_fp16, y = var_9482_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; + tensor w_193_to_fp16 = const()[name = string("w_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377785152)))]; + tensor obj_197_cast_fp16 = mul(x = w_193_to_fp16, y = hidden_states_241_cast_fp16)[name = string("obj_197_cast_fp16")]; + string query_145_pad_type_0 = const()[name = string("query_145_pad_type_0"), val = string("valid")]; + tensor query_145_strides_0 = const()[name = string("query_145_strides_0"), val = tensor([1, 1])]; + tensor query_145_pad_0 = const()[name = string("query_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_145_dilations_0 = const()[name = string("query_145_dilations_0"), val = tensor([1, 1])]; + int32 query_145_groups_0 = const()[name = string("query_145_groups_0"), val = int32(1)]; + tensor layers_24_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377787264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379884480))))[name = string("layers_24_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_145_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_145_dilations_0, groups = query_145_groups_0, pad = query_145_pad_0, pad_type = query_145_pad_type_0, strides = query_145_strides_0, weight = layers_24_self_attn_q_proj_weight_to_fp16_palettized, x = obj_197_cast_fp16)[name = string("query_145_cast_fp16")]; + string current_key_97_pad_type_0 = const()[name = string("current_key_97_pad_type_0"), val = string("valid")]; + tensor current_key_97_strides_0 = const()[name = string("current_key_97_strides_0"), val = tensor([1, 1])]; + tensor current_key_97_pad_0 = const()[name = string("current_key_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_97_dilations_0 = const()[name = string("current_key_97_dilations_0"), val = tensor([1, 1])]; + int32 current_key_97_groups_0 = const()[name = string("current_key_97_groups_0"), val = int32(1)]; + tensor layers_24_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379885056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380933696))))[name = string("layers_24_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_97_cast_fp16 = conv(dilations = current_key_97_dilations_0, groups = current_key_97_groups_0, pad = current_key_97_pad_0, pad_type = current_key_97_pad_type_0, strides = current_key_97_strides_0, weight = layers_24_self_attn_k_proj_weight_to_fp16_palettized, x = obj_197_cast_fp16)[name = string("current_key_97_cast_fp16")]; + string current_value_49_pad_type_0 = const()[name = string("current_value_49_pad_type_0"), val = string("valid")]; + tensor current_value_49_strides_0 = const()[name = string("current_value_49_strides_0"), val = tensor([1, 1])]; + tensor current_value_49_pad_0 = const()[name = string("current_value_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_49_dilations_0 = const()[name = string("current_value_49_dilations_0"), val = tensor([1, 1])]; + int32 current_value_49_groups_0 = const()[name = string("current_value_49_groups_0"), val = int32(1)]; + tensor layers_24_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380934272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381982912))))[name = string("layers_24_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_49_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_49_dilations_0, groups = current_value_49_groups_0, pad = current_value_49_pad_0, pad_type = current_value_49_pad_type_0, strides = current_value_49_strides_0, weight = layers_24_self_attn_v_proj_weight_to_fp16_palettized, x = obj_197_cast_fp16)[name = string("current_value_49_cast_fp16")]; + tensor var_9519 = const()[name = string("op_9519"), val = tensor([16, 128, 1, 1])]; + tensor inputs_193_cast_fp16 = reshape(shape = var_9519, x = query_145_cast_fp16)[name = string("inputs_193_cast_fp16")]; + tensor inputs_sq_195_cast_fp16 = mul(x = inputs_193_cast_fp16, y = inputs_193_cast_fp16)[name = string("inputs_sq_195_cast_fp16")]; + tensor variance_195_axes_0 = const()[name = string("variance_195_axes_0"), val = tensor([1])]; + bool variance_195_keep_dims_0 = const()[name = string("variance_195_keep_dims_0"), val = bool(true)]; + tensor variance_195_cast_fp16 = reduce_mean(axes = variance_195_axes_0, keep_dims = variance_195_keep_dims_0, x = inputs_sq_195_cast_fp16)[name = string("variance_195_cast_fp16")]; + fp16 var_9525_to_fp16 = const()[name = string("op_9525_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9526_cast_fp16 = add(x = variance_195_cast_fp16, y = var_9525_to_fp16)[name = string("op_9526_cast_fp16")]; + fp32 var_9527_epsilon_0 = const()[name = string("op_9527_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9527_cast_fp16 = rsqrt(epsilon = var_9527_epsilon_0, x = var_9526_cast_fp16)[name = string("op_9527_cast_fp16")]; + tensor hidden_states_243_cast_fp16 = mul(x = inputs_193_cast_fp16, y = var_9527_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor w_195_to_fp16 = const()[name = string("w_195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381983488)))]; + tensor query_normed_49_cast_fp16 = mul(x = w_195_to_fp16, y = hidden_states_243_cast_fp16)[name = string("query_normed_49_cast_fp16")]; + tensor var_9535 = const()[name = string("op_9535"), val = tensor([8, 128, 1, 1])]; + tensor inputs_195_cast_fp16 = reshape(shape = var_9535, x = current_key_97_cast_fp16)[name = string("inputs_195_cast_fp16")]; + tensor inputs_sq_197_cast_fp16 = mul(x = inputs_195_cast_fp16, y = inputs_195_cast_fp16)[name = string("inputs_sq_197_cast_fp16")]; + tensor variance_197_axes_0 = const()[name = string("variance_197_axes_0"), val = tensor([1])]; + bool variance_197_keep_dims_0 = const()[name = string("variance_197_keep_dims_0"), val = bool(true)]; + tensor variance_197_cast_fp16 = reduce_mean(axes = variance_197_axes_0, keep_dims = variance_197_keep_dims_0, x = inputs_sq_197_cast_fp16)[name = string("variance_197_cast_fp16")]; + fp16 var_9541_to_fp16 = const()[name = string("op_9541_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9542_cast_fp16 = add(x = variance_197_cast_fp16, y = var_9541_to_fp16)[name = string("op_9542_cast_fp16")]; + fp32 var_9543_epsilon_0 = const()[name = string("op_9543_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9543_cast_fp16 = rsqrt(epsilon = var_9543_epsilon_0, x = var_9542_cast_fp16)[name = string("op_9543_cast_fp16")]; + tensor hidden_states_245_cast_fp16 = mul(x = inputs_195_cast_fp16, y = var_9543_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; + tensor w_197_to_fp16 = const()[name = string("w_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381983808)))]; + tensor current_key_normed_49_cast_fp16 = mul(x = w_197_to_fp16, y = hidden_states_245_cast_fp16)[name = string("current_key_normed_49_cast_fp16")]; + tensor var_9561 = const()[name = string("op_9561"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_145_cast_fp16 = reshape(shape = var_9561, x = query_normed_49_cast_fp16)[name = string("mh_q_145_cast_fp16")]; + tensor var_9563 = const()[name = string("op_9563"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_97_cast_fp16 = reshape(shape = var_9563, x = current_key_normed_49_cast_fp16)[name = string("mh_k_97_cast_fp16")]; + tensor var_9567_cast_fp16 = mul(x = mh_q_145_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9567_cast_fp16")]; + tensor var_9572_begin_0 = const()[name = string("op_9572_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9572_end_0 = const()[name = string("op_9572_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_9572_end_mask_0 = const()[name = string("op_9572_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9572_cast_fp16 = slice_by_index(begin = var_9572_begin_0, end = var_9572_end_0, end_mask = var_9572_end_mask_0, x = mh_q_145_cast_fp16)[name = string("op_9572_cast_fp16")]; + tensor var_9578_begin_0 = const()[name = string("op_9578_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9578_end_0 = const()[name = string("op_9578_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_9578_end_mask_0 = const()[name = string("op_9578_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9578_cast_fp16 = slice_by_index(begin = var_9578_begin_0, end = var_9578_end_0, end_mask = var_9578_end_mask_0, x = mh_q_145_cast_fp16)[name = string("op_9578_cast_fp16")]; + fp16 const_569_promoted_to_fp16 = const()[name = string("const_569_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9580_cast_fp16 = mul(x = var_9578_cast_fp16, y = const_569_promoted_to_fp16)[name = string("op_9580_cast_fp16")]; + bool var_9582_interleave_0 = const()[name = string("op_9582_interleave_0"), val = bool(false)]; + tensor var_9582_cast_fp16 = concat(axis = var_9460, interleave = var_9582_interleave_0, values = (var_9580_cast_fp16, var_9572_cast_fp16))[name = string("op_9582_cast_fp16")]; + tensor var_9583_cast_fp16 = mul(x = var_9582_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9583_cast_fp16")]; + tensor mh_q_147_cast_fp16 = add(x = var_9567_cast_fp16, y = var_9583_cast_fp16)[name = string("mh_q_147_cast_fp16")]; + tensor var_9585_cast_fp16 = mul(x = mh_k_97_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9585_cast_fp16")]; + tensor var_9590_begin_0 = const()[name = string("op_9590_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9590_end_0 = const()[name = string("op_9590_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_9590_end_mask_0 = const()[name = string("op_9590_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9590_cast_fp16 = slice_by_index(begin = var_9590_begin_0, end = var_9590_end_0, end_mask = var_9590_end_mask_0, x = mh_k_97_cast_fp16)[name = string("op_9590_cast_fp16")]; + tensor var_9596_begin_0 = const()[name = string("op_9596_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9596_end_0 = const()[name = string("op_9596_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_9596_end_mask_0 = const()[name = string("op_9596_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9596_cast_fp16 = slice_by_index(begin = var_9596_begin_0, end = var_9596_end_0, end_mask = var_9596_end_mask_0, x = mh_k_97_cast_fp16)[name = string("op_9596_cast_fp16")]; + fp16 const_572_promoted_to_fp16 = const()[name = string("const_572_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9598_cast_fp16 = mul(x = var_9596_cast_fp16, y = const_572_promoted_to_fp16)[name = string("op_9598_cast_fp16")]; + bool var_9600_interleave_0 = const()[name = string("op_9600_interleave_0"), val = bool(false)]; + tensor var_9600_cast_fp16 = concat(axis = var_9460, interleave = var_9600_interleave_0, values = (var_9598_cast_fp16, var_9590_cast_fp16))[name = string("op_9600_cast_fp16")]; + tensor var_9601_cast_fp16 = mul(x = var_9600_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9601_cast_fp16")]; + tensor mh_k_99_cast_fp16 = add(x = var_9585_cast_fp16, y = var_9601_cast_fp16)[name = string("mh_k_99_cast_fp16")]; + tensor var_9605 = const()[name = string("op_9605"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_99_cast_fp16 = reshape(shape = var_9605, x = mh_k_99_cast_fp16)[name = string("current_key_99_cast_fp16")]; + tensor var_9612_cast_fp16 = mul(x = var_101_cast_fp16_24, y = var_323_cast_fp16)[name = string("op_9612_cast_fp16")]; + tensor var_9613_cast_fp16 = mul(x = current_key_99_cast_fp16, y = var_321_cast_fp16)[name = string("op_9613_cast_fp16")]; + tensor key_147_cast_fp16 = add(x = var_9612_cast_fp16, y = var_9613_cast_fp16)[name = string("key_147_cast_fp16")]; + tensor var_9616_cast_fp16 = mul(x = var_132_cast_fp16_24, y = var_323_cast_fp16)[name = string("op_9616_cast_fp16")]; + tensor var_9617_cast_fp16 = mul(x = current_value_49_cast_fp16, y = var_321_cast_fp16)[name = string("op_9617_cast_fp16")]; + tensor value_97_cast_fp16 = add(x = var_9616_cast_fp16, y = var_9617_cast_fp16)[name = string("value_97_cast_fp16")]; + tensor var_9621 = const()[name = string("op_9621"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_97_cast_fp16 = reshape(shape = var_9621, x = key_147_cast_fp16)[name = string("key_heads_97_cast_fp16")]; + tensor var_9623 = const()[name = string("op_9623"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_97_cast_fp16 = reshape(shape = var_9623, x = value_97_cast_fp16)[name = string("value_heads_97_cast_fp16")]; + tensor var_9626_begin_0 = const()[name = string("op_9626_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9626_end_0 = const()[name = string("op_9626_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9626_end_mask_0 = const()[name = string("op_9626_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9626_cast_fp16 = slice_by_index(begin = var_9626_begin_0, end = var_9626_end_0, end_mask = var_9626_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9626_cast_fp16")]; + tensor var_9630_begin_0 = const()[name = string("op_9630_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9630_end_0 = const()[name = string("op_9630_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9630_end_mask_0 = const()[name = string("op_9630_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9630_cast_fp16 = slice_by_index(begin = var_9630_begin_0, end = var_9630_end_0, end_mask = var_9630_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9630_cast_fp16")]; + tensor var_9642_begin_0 = const()[name = string("op_9642_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_9642_end_0 = const()[name = string("op_9642_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_9642_end_mask_0 = const()[name = string("op_9642_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9642_cast_fp16 = slice_by_index(begin = var_9642_begin_0, end = var_9642_end_0, end_mask = var_9642_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9642_cast_fp16")]; + tensor var_9646_begin_0 = const()[name = string("op_9646_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_9646_end_0 = const()[name = string("op_9646_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_9646_end_mask_0 = const()[name = string("op_9646_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9646_cast_fp16 = slice_by_index(begin = var_9646_begin_0, end = var_9646_end_0, end_mask = var_9646_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9646_cast_fp16")]; + tensor var_9658_begin_0 = const()[name = string("op_9658_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_9658_end_0 = const()[name = string("op_9658_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_9658_end_mask_0 = const()[name = string("op_9658_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9658_cast_fp16 = slice_by_index(begin = var_9658_begin_0, end = var_9658_end_0, end_mask = var_9658_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9658_cast_fp16")]; + tensor var_9662_begin_0 = const()[name = string("op_9662_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_9662_end_0 = const()[name = string("op_9662_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_9662_end_mask_0 = const()[name = string("op_9662_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9662_cast_fp16 = slice_by_index(begin = var_9662_begin_0, end = var_9662_end_0, end_mask = var_9662_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9662_cast_fp16")]; + tensor var_9674_begin_0 = const()[name = string("op_9674_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_9674_end_0 = const()[name = string("op_9674_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_9674_end_mask_0 = const()[name = string("op_9674_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9674_cast_fp16 = slice_by_index(begin = var_9674_begin_0, end = var_9674_end_0, end_mask = var_9674_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9674_cast_fp16")]; + tensor var_9678_begin_0 = const()[name = string("op_9678_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_9678_end_0 = const()[name = string("op_9678_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_9678_end_mask_0 = const()[name = string("op_9678_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9678_cast_fp16 = slice_by_index(begin = var_9678_begin_0, end = var_9678_end_0, end_mask = var_9678_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9678_cast_fp16")]; + tensor var_9690_begin_0 = const()[name = string("op_9690_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_9690_end_0 = const()[name = string("op_9690_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_9690_end_mask_0 = const()[name = string("op_9690_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9690_cast_fp16 = slice_by_index(begin = var_9690_begin_0, end = var_9690_end_0, end_mask = var_9690_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9690_cast_fp16")]; + tensor var_9694_begin_0 = const()[name = string("op_9694_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_9694_end_0 = const()[name = string("op_9694_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_9694_end_mask_0 = const()[name = string("op_9694_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9694_cast_fp16 = slice_by_index(begin = var_9694_begin_0, end = var_9694_end_0, end_mask = var_9694_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9694_cast_fp16")]; + tensor var_9706_begin_0 = const()[name = string("op_9706_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_9706_end_0 = const()[name = string("op_9706_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_9706_end_mask_0 = const()[name = string("op_9706_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9706_cast_fp16 = slice_by_index(begin = var_9706_begin_0, end = var_9706_end_0, end_mask = var_9706_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9706_cast_fp16")]; + tensor var_9710_begin_0 = const()[name = string("op_9710_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_9710_end_0 = const()[name = string("op_9710_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_9710_end_mask_0 = const()[name = string("op_9710_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9710_cast_fp16 = slice_by_index(begin = var_9710_begin_0, end = var_9710_end_0, end_mask = var_9710_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9710_cast_fp16")]; + tensor var_9722_begin_0 = const()[name = string("op_9722_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_9722_end_0 = const()[name = string("op_9722_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_9722_end_mask_0 = const()[name = string("op_9722_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9722_cast_fp16 = slice_by_index(begin = var_9722_begin_0, end = var_9722_end_0, end_mask = var_9722_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9722_cast_fp16")]; + tensor var_9726_begin_0 = const()[name = string("op_9726_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_9726_end_0 = const()[name = string("op_9726_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_9726_end_mask_0 = const()[name = string("op_9726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9726_cast_fp16 = slice_by_index(begin = var_9726_begin_0, end = var_9726_end_0, end_mask = var_9726_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9726_cast_fp16")]; + tensor var_9738_begin_0 = const()[name = string("op_9738_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_9738_end_0 = const()[name = string("op_9738_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9738_end_mask_0 = const()[name = string("op_9738_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9738_cast_fp16 = slice_by_index(begin = var_9738_begin_0, end = var_9738_end_0, end_mask = var_9738_end_mask_0, x = key_heads_97_cast_fp16)[name = string("op_9738_cast_fp16")]; + tensor var_9742_begin_0 = const()[name = string("op_9742_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_9742_end_0 = const()[name = string("op_9742_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_9742_end_mask_0 = const()[name = string("op_9742_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9742_cast_fp16 = slice_by_index(begin = var_9742_begin_0, end = var_9742_end_0, end_mask = var_9742_end_mask_0, x = value_heads_97_cast_fp16)[name = string("op_9742_cast_fp16")]; + bool key_heads_99_interleave_0 = const()[name = string("key_heads_99_interleave_0"), val = bool(false)]; + tensor key_heads_99_cast_fp16 = concat(axis = var_9468, interleave = key_heads_99_interleave_0, values = (var_9626_cast_fp16, var_9626_cast_fp16, var_9642_cast_fp16, var_9642_cast_fp16, var_9658_cast_fp16, var_9658_cast_fp16, var_9674_cast_fp16, var_9674_cast_fp16, var_9690_cast_fp16, var_9690_cast_fp16, var_9706_cast_fp16, var_9706_cast_fp16, var_9722_cast_fp16, var_9722_cast_fp16, var_9738_cast_fp16, var_9738_cast_fp16))[name = string("key_heads_99_cast_fp16")]; + bool value_heads_99_interleave_0 = const()[name = string("value_heads_99_interleave_0"), val = bool(false)]; + tensor value_heads_99_cast_fp16 = concat(axis = var_9468, interleave = value_heads_99_interleave_0, values = (var_9630_cast_fp16, var_9630_cast_fp16, var_9646_cast_fp16, var_9646_cast_fp16, var_9662_cast_fp16, var_9662_cast_fp16, var_9678_cast_fp16, var_9678_cast_fp16, var_9694_cast_fp16, var_9694_cast_fp16, var_9710_cast_fp16, var_9710_cast_fp16, var_9726_cast_fp16, var_9726_cast_fp16, var_9742_cast_fp16, var_9742_cast_fp16))[name = string("value_heads_99_cast_fp16")]; + fp16 var_9765_to_fp16 = const()[name = string("op_9765_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_9766_cast_fp16 = mul(x = mh_q_147_cast_fp16, y = var_9765_to_fp16)[name = string("op_9766_cast_fp16")]; + bool mh_w_97_transpose_x_0 = const()[name = string("mh_w_97_transpose_x_0"), val = bool(true)]; + bool mh_w_97_transpose_y_0 = const()[name = string("mh_w_97_transpose_y_0"), val = bool(false)]; + tensor mh_w_97_cast_fp16 = matmul(transpose_x = mh_w_97_transpose_x_0, transpose_y = mh_w_97_transpose_y_0, x = var_9766_cast_fp16, y = key_heads_99_cast_fp16)[name = string("mh_w_97_cast_fp16")]; + tensor mh_w_99_cast_fp16 = add(x = mh_w_97_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_99_cast_fp16")]; + tensor var_9778_cast_fp16 = softmax(axis = var_9450, x = mh_w_99_cast_fp16)[name = string("op_9778_cast_fp16")]; + bool attn_49_transpose_x_0 = const()[name = string("attn_49_transpose_x_0"), val = bool(false)]; + bool attn_49_transpose_y_0 = const()[name = string("attn_49_transpose_y_0"), val = bool(true)]; + tensor attn_49_cast_fp16 = matmul(transpose_x = attn_49_transpose_x_0, transpose_y = attn_49_transpose_y_0, x = value_heads_99_cast_fp16, y = var_9778_cast_fp16)[name = string("attn_49_cast_fp16")]; + tensor var_9783 = const()[name = string("op_9783"), val = tensor([1, -1, 1, 1])]; + tensor input_193_cast_fp16 = reshape(shape = var_9783, x = attn_49_cast_fp16)[name = string("input_193_cast_fp16")]; + string obj_203_pad_type_0 = const()[name = string("obj_203_pad_type_0"), val = string("valid")]; + tensor obj_203_strides_0 = const()[name = string("obj_203_strides_0"), val = tensor([1, 1])]; + tensor obj_203_pad_0 = const()[name = string("obj_203_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_203_dilations_0 = const()[name = string("obj_203_dilations_0"), val = tensor([1, 1])]; + int32 obj_203_groups_0 = const()[name = string("obj_203_groups_0"), val = int32(1)]; + tensor layers_24_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381984128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384081344))))[name = string("layers_24_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_203_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_203_dilations_0, groups = obj_203_groups_0, pad = obj_203_pad_0, pad_type = obj_203_pad_type_0, strides = obj_203_strides_0, weight = layers_24_self_attn_o_proj_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = string("obj_203_cast_fp16")]; + tensor inputs_197_cast_fp16 = add(x = inputs_191_cast_fp16, y = obj_203_cast_fp16)[name = string("inputs_197_cast_fp16")]; + tensor inputs_sq_199_cast_fp16 = mul(x = inputs_197_cast_fp16, y = inputs_197_cast_fp16)[name = string("inputs_sq_199_cast_fp16")]; + tensor variance_199_axes_0 = const()[name = string("variance_199_axes_0"), val = tensor([1])]; + bool variance_199_keep_dims_0 = const()[name = string("variance_199_keep_dims_0"), val = bool(true)]; + tensor variance_199_cast_fp16 = reduce_mean(axes = variance_199_axes_0, keep_dims = variance_199_keep_dims_0, x = inputs_sq_199_cast_fp16)[name = string("variance_199_cast_fp16")]; + fp16 var_9801_to_fp16 = const()[name = string("op_9801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9802_cast_fp16 = add(x = variance_199_cast_fp16, y = var_9801_to_fp16)[name = string("op_9802_cast_fp16")]; + fp32 var_9803_epsilon_0 = const()[name = string("op_9803_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9803_cast_fp16 = rsqrt(epsilon = var_9803_epsilon_0, x = var_9802_cast_fp16)[name = string("op_9803_cast_fp16")]; + tensor hidden_states_247_cast_fp16 = mul(x = inputs_197_cast_fp16, y = var_9803_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor w_199_to_fp16 = const()[name = string("w_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384081920)))]; + tensor input_195_cast_fp16 = mul(x = w_199_to_fp16, y = hidden_states_247_cast_fp16)[name = string("input_195_cast_fp16")]; + string input_197_pad_type_0 = const()[name = string("input_197_pad_type_0"), val = string("valid")]; + tensor input_197_strides_0 = const()[name = string("input_197_strides_0"), val = tensor([1, 1])]; + tensor input_197_pad_0 = const()[name = string("input_197_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_197_dilations_0 = const()[name = string("input_197_dilations_0"), val = tensor([1, 1])]; + int32 input_197_groups_0 = const()[name = string("input_197_groups_0"), val = int32(1)]; + tensor layers_24_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384084032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387229824))))[name = string("layers_24_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_197_cast_fp16 = conv(dilations = input_197_dilations_0, groups = input_197_groups_0, pad = input_197_pad_0, pad_type = input_197_pad_type_0, strides = input_197_strides_0, weight = layers_24_mlp_gate_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("input_197_cast_fp16")]; + tensor var_9817_cast_fp16 = silu(x = input_197_cast_fp16)[name = string("op_9817_cast_fp16")]; + string var_9823_pad_type_0 = const()[name = string("op_9823_pad_type_0"), val = string("valid")]; + tensor var_9823_strides_0 = const()[name = string("op_9823_strides_0"), val = tensor([1, 1])]; + tensor var_9823_pad_0 = const()[name = string("op_9823_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9823_dilations_0 = const()[name = string("op_9823_dilations_0"), val = tensor([1, 1])]; + int32 var_9823_groups_0 = const()[name = string("op_9823_groups_0"), val = int32(1)]; + tensor layers_24_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387230400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390376192))))[name = string("layers_24_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_9823_cast_fp16 = conv(dilations = var_9823_dilations_0, groups = var_9823_groups_0, pad = var_9823_pad_0, pad_type = var_9823_pad_type_0, strides = var_9823_strides_0, weight = layers_24_mlp_up_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("op_9823_cast_fp16")]; + tensor input_199_cast_fp16 = mul(x = var_9817_cast_fp16, y = var_9823_cast_fp16)[name = string("input_199_cast_fp16")]; + string hidden_states_249_pad_type_0 = const()[name = string("hidden_states_249_pad_type_0"), val = string("valid")]; + tensor hidden_states_249_strides_0 = const()[name = string("hidden_states_249_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_249_pad_0 = const()[name = string("hidden_states_249_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_249_dilations_0 = const()[name = string("hidden_states_249_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_249_groups_0 = const()[name = string("hidden_states_249_groups_0"), val = int32(1)]; + tensor layers_24_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390376768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393522560))))[name = string("layers_24_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_249_cast_fp16 = conv(dilations = hidden_states_249_dilations_0, groups = hidden_states_249_groups_0, pad = hidden_states_249_pad_0, pad_type = hidden_states_249_pad_type_0, strides = hidden_states_249_strides_0, weight = layers_24_mlp_down_proj_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; + tensor inputs_199_cast_fp16 = add(x = inputs_197_cast_fp16, y = hidden_states_249_cast_fp16)[name = string("inputs_199_cast_fp16")]; + int32 var_9837 = const()[name = string("op_9837"), val = int32(3)]; + int32 var_9847 = const()[name = string("op_9847"), val = int32(-2)]; + int32 var_9855 = const()[name = string("op_9855"), val = int32(1)]; + tensor inputs_sq_201_cast_fp16 = mul(x = inputs_199_cast_fp16, y = inputs_199_cast_fp16)[name = string("inputs_sq_201_cast_fp16")]; + tensor variance_201_axes_0 = const()[name = string("variance_201_axes_0"), val = tensor([1])]; + bool variance_201_keep_dims_0 = const()[name = string("variance_201_keep_dims_0"), val = bool(true)]; + tensor variance_201_cast_fp16 = reduce_mean(axes = variance_201_axes_0, keep_dims = variance_201_keep_dims_0, x = inputs_sq_201_cast_fp16)[name = string("variance_201_cast_fp16")]; + fp16 var_9867_to_fp16 = const()[name = string("op_9867_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9868_cast_fp16 = add(x = variance_201_cast_fp16, y = var_9867_to_fp16)[name = string("op_9868_cast_fp16")]; + fp32 var_9869_epsilon_0 = const()[name = string("op_9869_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9869_cast_fp16 = rsqrt(epsilon = var_9869_epsilon_0, x = var_9868_cast_fp16)[name = string("op_9869_cast_fp16")]; + tensor hidden_states_251_cast_fp16 = mul(x = inputs_199_cast_fp16, y = var_9869_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; + tensor w_201_to_fp16 = const()[name = string("w_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393523136)))]; + tensor obj_205_cast_fp16 = mul(x = w_201_to_fp16, y = hidden_states_251_cast_fp16)[name = string("obj_205_cast_fp16")]; + string query_151_pad_type_0 = const()[name = string("query_151_pad_type_0"), val = string("valid")]; + tensor query_151_strides_0 = const()[name = string("query_151_strides_0"), val = tensor([1, 1])]; + tensor query_151_pad_0 = const()[name = string("query_151_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_151_dilations_0 = const()[name = string("query_151_dilations_0"), val = tensor([1, 1])]; + int32 query_151_groups_0 = const()[name = string("query_151_groups_0"), val = int32(1)]; + tensor layers_25_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393525248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395622464))))[name = string("layers_25_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_151_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_151_dilations_0, groups = query_151_groups_0, pad = query_151_pad_0, pad_type = query_151_pad_type_0, strides = query_151_strides_0, weight = layers_25_self_attn_q_proj_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("query_151_cast_fp16")]; + string current_key_101_pad_type_0 = const()[name = string("current_key_101_pad_type_0"), val = string("valid")]; + tensor current_key_101_strides_0 = const()[name = string("current_key_101_strides_0"), val = tensor([1, 1])]; + tensor current_key_101_pad_0 = const()[name = string("current_key_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_101_dilations_0 = const()[name = string("current_key_101_dilations_0"), val = tensor([1, 1])]; + int32 current_key_101_groups_0 = const()[name = string("current_key_101_groups_0"), val = int32(1)]; + tensor layers_25_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395623040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396671680))))[name = string("layers_25_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_101_cast_fp16 = conv(dilations = current_key_101_dilations_0, groups = current_key_101_groups_0, pad = current_key_101_pad_0, pad_type = current_key_101_pad_type_0, strides = current_key_101_strides_0, weight = layers_25_self_attn_k_proj_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("current_key_101_cast_fp16")]; + string current_value_51_pad_type_0 = const()[name = string("current_value_51_pad_type_0"), val = string("valid")]; + tensor current_value_51_strides_0 = const()[name = string("current_value_51_strides_0"), val = tensor([1, 1])]; + tensor current_value_51_pad_0 = const()[name = string("current_value_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_51_dilations_0 = const()[name = string("current_value_51_dilations_0"), val = tensor([1, 1])]; + int32 current_value_51_groups_0 = const()[name = string("current_value_51_groups_0"), val = int32(1)]; + tensor layers_25_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396672256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397720896))))[name = string("layers_25_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_51_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_51_dilations_0, groups = current_value_51_groups_0, pad = current_value_51_pad_0, pad_type = current_value_51_pad_type_0, strides = current_value_51_strides_0, weight = layers_25_self_attn_v_proj_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("current_value_51_cast_fp16")]; + tensor var_9906 = const()[name = string("op_9906"), val = tensor([16, 128, 1, 1])]; + tensor inputs_201_cast_fp16 = reshape(shape = var_9906, x = query_151_cast_fp16)[name = string("inputs_201_cast_fp16")]; + tensor inputs_sq_203_cast_fp16 = mul(x = inputs_201_cast_fp16, y = inputs_201_cast_fp16)[name = string("inputs_sq_203_cast_fp16")]; + tensor variance_203_axes_0 = const()[name = string("variance_203_axes_0"), val = tensor([1])]; + bool variance_203_keep_dims_0 = const()[name = string("variance_203_keep_dims_0"), val = bool(true)]; + tensor variance_203_cast_fp16 = reduce_mean(axes = variance_203_axes_0, keep_dims = variance_203_keep_dims_0, x = inputs_sq_203_cast_fp16)[name = string("variance_203_cast_fp16")]; + fp16 var_9912_to_fp16 = const()[name = string("op_9912_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9913_cast_fp16 = add(x = variance_203_cast_fp16, y = var_9912_to_fp16)[name = string("op_9913_cast_fp16")]; + fp32 var_9914_epsilon_0 = const()[name = string("op_9914_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9914_cast_fp16 = rsqrt(epsilon = var_9914_epsilon_0, x = var_9913_cast_fp16)[name = string("op_9914_cast_fp16")]; + tensor hidden_states_253_cast_fp16 = mul(x = inputs_201_cast_fp16, y = var_9914_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; + tensor w_203_to_fp16 = const()[name = string("w_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397721472)))]; + tensor query_normed_51_cast_fp16 = mul(x = w_203_to_fp16, y = hidden_states_253_cast_fp16)[name = string("query_normed_51_cast_fp16")]; + tensor var_9922 = const()[name = string("op_9922"), val = tensor([8, 128, 1, 1])]; + tensor inputs_203_cast_fp16 = reshape(shape = var_9922, x = current_key_101_cast_fp16)[name = string("inputs_203_cast_fp16")]; + tensor inputs_sq_205_cast_fp16 = mul(x = inputs_203_cast_fp16, y = inputs_203_cast_fp16)[name = string("inputs_sq_205_cast_fp16")]; + tensor variance_205_axes_0 = const()[name = string("variance_205_axes_0"), val = tensor([1])]; + bool variance_205_keep_dims_0 = const()[name = string("variance_205_keep_dims_0"), val = bool(true)]; + tensor variance_205_cast_fp16 = reduce_mean(axes = variance_205_axes_0, keep_dims = variance_205_keep_dims_0, x = inputs_sq_205_cast_fp16)[name = string("variance_205_cast_fp16")]; + fp16 var_9928_to_fp16 = const()[name = string("op_9928_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_9929_cast_fp16 = add(x = variance_205_cast_fp16, y = var_9928_to_fp16)[name = string("op_9929_cast_fp16")]; + fp32 var_9930_epsilon_0 = const()[name = string("op_9930_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_9930_cast_fp16 = rsqrt(epsilon = var_9930_epsilon_0, x = var_9929_cast_fp16)[name = string("op_9930_cast_fp16")]; + tensor hidden_states_255_cast_fp16 = mul(x = inputs_203_cast_fp16, y = var_9930_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + tensor w_205_to_fp16 = const()[name = string("w_205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397721792)))]; + tensor current_key_normed_51_cast_fp16 = mul(x = w_205_to_fp16, y = hidden_states_255_cast_fp16)[name = string("current_key_normed_51_cast_fp16")]; + tensor var_9948 = const()[name = string("op_9948"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_151_cast_fp16 = reshape(shape = var_9948, x = query_normed_51_cast_fp16)[name = string("mh_q_151_cast_fp16")]; + tensor var_9950 = const()[name = string("op_9950"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_101_cast_fp16 = reshape(shape = var_9950, x = current_key_normed_51_cast_fp16)[name = string("mh_k_101_cast_fp16")]; + tensor var_9954_cast_fp16 = mul(x = mh_q_151_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9954_cast_fp16")]; + tensor var_9959_begin_0 = const()[name = string("op_9959_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9959_end_0 = const()[name = string("op_9959_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_9959_end_mask_0 = const()[name = string("op_9959_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9959_cast_fp16 = slice_by_index(begin = var_9959_begin_0, end = var_9959_end_0, end_mask = var_9959_end_mask_0, x = mh_q_151_cast_fp16)[name = string("op_9959_cast_fp16")]; + tensor var_9965_begin_0 = const()[name = string("op_9965_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9965_end_0 = const()[name = string("op_9965_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_9965_end_mask_0 = const()[name = string("op_9965_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9965_cast_fp16 = slice_by_index(begin = var_9965_begin_0, end = var_9965_end_0, end_mask = var_9965_end_mask_0, x = mh_q_151_cast_fp16)[name = string("op_9965_cast_fp16")]; + fp16 const_592_promoted_to_fp16 = const()[name = string("const_592_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9967_cast_fp16 = mul(x = var_9965_cast_fp16, y = const_592_promoted_to_fp16)[name = string("op_9967_cast_fp16")]; + bool var_9969_interleave_0 = const()[name = string("op_9969_interleave_0"), val = bool(false)]; + tensor var_9969_cast_fp16 = concat(axis = var_9847, interleave = var_9969_interleave_0, values = (var_9967_cast_fp16, var_9959_cast_fp16))[name = string("op_9969_cast_fp16")]; + tensor var_9970_cast_fp16 = mul(x = var_9969_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9970_cast_fp16")]; + tensor mh_q_153_cast_fp16 = add(x = var_9954_cast_fp16, y = var_9970_cast_fp16)[name = string("mh_q_153_cast_fp16")]; + tensor var_9972_cast_fp16 = mul(x = mh_k_101_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9972_cast_fp16")]; + tensor var_9977_begin_0 = const()[name = string("op_9977_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9977_end_0 = const()[name = string("op_9977_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_9977_end_mask_0 = const()[name = string("op_9977_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_9977_cast_fp16 = slice_by_index(begin = var_9977_begin_0, end = var_9977_end_0, end_mask = var_9977_end_mask_0, x = mh_k_101_cast_fp16)[name = string("op_9977_cast_fp16")]; + tensor var_9983_begin_0 = const()[name = string("op_9983_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_9983_end_0 = const()[name = string("op_9983_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_9983_end_mask_0 = const()[name = string("op_9983_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_9983_cast_fp16 = slice_by_index(begin = var_9983_begin_0, end = var_9983_end_0, end_mask = var_9983_end_mask_0, x = mh_k_101_cast_fp16)[name = string("op_9983_cast_fp16")]; + fp16 const_595_promoted_to_fp16 = const()[name = string("const_595_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9985_cast_fp16 = mul(x = var_9983_cast_fp16, y = const_595_promoted_to_fp16)[name = string("op_9985_cast_fp16")]; + bool var_9987_interleave_0 = const()[name = string("op_9987_interleave_0"), val = bool(false)]; + tensor var_9987_cast_fp16 = concat(axis = var_9847, interleave = var_9987_interleave_0, values = (var_9985_cast_fp16, var_9977_cast_fp16))[name = string("op_9987_cast_fp16")]; + tensor var_9988_cast_fp16 = mul(x = var_9987_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9988_cast_fp16")]; + tensor mh_k_103_cast_fp16 = add(x = var_9972_cast_fp16, y = var_9988_cast_fp16)[name = string("mh_k_103_cast_fp16")]; + tensor var_9992 = const()[name = string("op_9992"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_103_cast_fp16 = reshape(shape = var_9992, x = mh_k_103_cast_fp16)[name = string("current_key_103_cast_fp16")]; + tensor var_9999_cast_fp16 = mul(x = var_101_cast_fp16_25, y = var_323_cast_fp16)[name = string("op_9999_cast_fp16")]; + tensor var_10000_cast_fp16 = mul(x = current_key_103_cast_fp16, y = var_321_cast_fp16)[name = string("op_10000_cast_fp16")]; + tensor key_153_cast_fp16 = add(x = var_9999_cast_fp16, y = var_10000_cast_fp16)[name = string("key_153_cast_fp16")]; + tensor var_10003_cast_fp16 = mul(x = var_132_cast_fp16_25, y = var_323_cast_fp16)[name = string("op_10003_cast_fp16")]; + tensor var_10004_cast_fp16 = mul(x = current_value_51_cast_fp16, y = var_321_cast_fp16)[name = string("op_10004_cast_fp16")]; + tensor value_101_cast_fp16 = add(x = var_10003_cast_fp16, y = var_10004_cast_fp16)[name = string("value_101_cast_fp16")]; + tensor var_10008 = const()[name = string("op_10008"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_101_cast_fp16 = reshape(shape = var_10008, x = key_153_cast_fp16)[name = string("key_heads_101_cast_fp16")]; + tensor var_10010 = const()[name = string("op_10010"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_101_cast_fp16 = reshape(shape = var_10010, x = value_101_cast_fp16)[name = string("value_heads_101_cast_fp16")]; + tensor var_10013_begin_0 = const()[name = string("op_10013_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10013_end_0 = const()[name = string("op_10013_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10013_end_mask_0 = const()[name = string("op_10013_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10013_cast_fp16 = slice_by_index(begin = var_10013_begin_0, end = var_10013_end_0, end_mask = var_10013_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10013_cast_fp16")]; + tensor var_10017_begin_0 = const()[name = string("op_10017_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10017_end_0 = const()[name = string("op_10017_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10017_end_mask_0 = const()[name = string("op_10017_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10017_cast_fp16 = slice_by_index(begin = var_10017_begin_0, end = var_10017_end_0, end_mask = var_10017_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10017_cast_fp16")]; + tensor var_10029_begin_0 = const()[name = string("op_10029_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10029_end_0 = const()[name = string("op_10029_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10029_end_mask_0 = const()[name = string("op_10029_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10029_cast_fp16 = slice_by_index(begin = var_10029_begin_0, end = var_10029_end_0, end_mask = var_10029_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10029_cast_fp16")]; + tensor var_10033_begin_0 = const()[name = string("op_10033_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10033_end_0 = const()[name = string("op_10033_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10033_end_mask_0 = const()[name = string("op_10033_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10033_cast_fp16 = slice_by_index(begin = var_10033_begin_0, end = var_10033_end_0, end_mask = var_10033_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10033_cast_fp16")]; + tensor var_10045_begin_0 = const()[name = string("op_10045_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10045_end_0 = const()[name = string("op_10045_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10045_end_mask_0 = const()[name = string("op_10045_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10045_cast_fp16 = slice_by_index(begin = var_10045_begin_0, end = var_10045_end_0, end_mask = var_10045_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10045_cast_fp16")]; + tensor var_10049_begin_0 = const()[name = string("op_10049_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10049_end_0 = const()[name = string("op_10049_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10049_end_mask_0 = const()[name = string("op_10049_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10049_cast_fp16 = slice_by_index(begin = var_10049_begin_0, end = var_10049_end_0, end_mask = var_10049_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10049_cast_fp16")]; + tensor var_10061_begin_0 = const()[name = string("op_10061_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10061_end_0 = const()[name = string("op_10061_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10061_end_mask_0 = const()[name = string("op_10061_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10061_cast_fp16 = slice_by_index(begin = var_10061_begin_0, end = var_10061_end_0, end_mask = var_10061_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10061_cast_fp16")]; + tensor var_10065_begin_0 = const()[name = string("op_10065_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10065_end_0 = const()[name = string("op_10065_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10065_end_mask_0 = const()[name = string("op_10065_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10065_cast_fp16 = slice_by_index(begin = var_10065_begin_0, end = var_10065_end_0, end_mask = var_10065_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10065_cast_fp16")]; + tensor var_10077_begin_0 = const()[name = string("op_10077_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10077_end_0 = const()[name = string("op_10077_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10077_end_mask_0 = const()[name = string("op_10077_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10077_cast_fp16 = slice_by_index(begin = var_10077_begin_0, end = var_10077_end_0, end_mask = var_10077_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10077_cast_fp16")]; + tensor var_10081_begin_0 = const()[name = string("op_10081_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10081_end_0 = const()[name = string("op_10081_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10081_end_mask_0 = const()[name = string("op_10081_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10081_cast_fp16 = slice_by_index(begin = var_10081_begin_0, end = var_10081_end_0, end_mask = var_10081_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10081_cast_fp16")]; + tensor var_10093_begin_0 = const()[name = string("op_10093_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10093_end_0 = const()[name = string("op_10093_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10093_end_mask_0 = const()[name = string("op_10093_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10093_cast_fp16 = slice_by_index(begin = var_10093_begin_0, end = var_10093_end_0, end_mask = var_10093_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10093_cast_fp16")]; + tensor var_10097_begin_0 = const()[name = string("op_10097_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10097_end_0 = const()[name = string("op_10097_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10097_end_mask_0 = const()[name = string("op_10097_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10097_cast_fp16 = slice_by_index(begin = var_10097_begin_0, end = var_10097_end_0, end_mask = var_10097_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10097_cast_fp16")]; + tensor var_10109_begin_0 = const()[name = string("op_10109_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10109_end_0 = const()[name = string("op_10109_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10109_end_mask_0 = const()[name = string("op_10109_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10109_cast_fp16 = slice_by_index(begin = var_10109_begin_0, end = var_10109_end_0, end_mask = var_10109_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10109_cast_fp16")]; + tensor var_10113_begin_0 = const()[name = string("op_10113_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10113_end_0 = const()[name = string("op_10113_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10113_end_mask_0 = const()[name = string("op_10113_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10113_cast_fp16 = slice_by_index(begin = var_10113_begin_0, end = var_10113_end_0, end_mask = var_10113_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10113_cast_fp16")]; + tensor var_10125_begin_0 = const()[name = string("op_10125_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10125_end_0 = const()[name = string("op_10125_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10125_end_mask_0 = const()[name = string("op_10125_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10125_cast_fp16 = slice_by_index(begin = var_10125_begin_0, end = var_10125_end_0, end_mask = var_10125_end_mask_0, x = key_heads_101_cast_fp16)[name = string("op_10125_cast_fp16")]; + tensor var_10129_begin_0 = const()[name = string("op_10129_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10129_end_0 = const()[name = string("op_10129_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10129_end_mask_0 = const()[name = string("op_10129_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10129_cast_fp16 = slice_by_index(begin = var_10129_begin_0, end = var_10129_end_0, end_mask = var_10129_end_mask_0, x = value_heads_101_cast_fp16)[name = string("op_10129_cast_fp16")]; + bool key_heads_103_interleave_0 = const()[name = string("key_heads_103_interleave_0"), val = bool(false)]; + tensor key_heads_103_cast_fp16 = concat(axis = var_9855, interleave = key_heads_103_interleave_0, values = (var_10013_cast_fp16, var_10013_cast_fp16, var_10029_cast_fp16, var_10029_cast_fp16, var_10045_cast_fp16, var_10045_cast_fp16, var_10061_cast_fp16, var_10061_cast_fp16, var_10077_cast_fp16, var_10077_cast_fp16, var_10093_cast_fp16, var_10093_cast_fp16, var_10109_cast_fp16, var_10109_cast_fp16, var_10125_cast_fp16, var_10125_cast_fp16))[name = string("key_heads_103_cast_fp16")]; + bool value_heads_103_interleave_0 = const()[name = string("value_heads_103_interleave_0"), val = bool(false)]; + tensor value_heads_103_cast_fp16 = concat(axis = var_9855, interleave = value_heads_103_interleave_0, values = (var_10017_cast_fp16, var_10017_cast_fp16, var_10033_cast_fp16, var_10033_cast_fp16, var_10049_cast_fp16, var_10049_cast_fp16, var_10065_cast_fp16, var_10065_cast_fp16, var_10081_cast_fp16, var_10081_cast_fp16, var_10097_cast_fp16, var_10097_cast_fp16, var_10113_cast_fp16, var_10113_cast_fp16, var_10129_cast_fp16, var_10129_cast_fp16))[name = string("value_heads_103_cast_fp16")]; + fp16 var_10152_to_fp16 = const()[name = string("op_10152_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_10153_cast_fp16 = mul(x = mh_q_153_cast_fp16, y = var_10152_to_fp16)[name = string("op_10153_cast_fp16")]; + bool mh_w_101_transpose_x_0 = const()[name = string("mh_w_101_transpose_x_0"), val = bool(true)]; + bool mh_w_101_transpose_y_0 = const()[name = string("mh_w_101_transpose_y_0"), val = bool(false)]; + tensor mh_w_101_cast_fp16 = matmul(transpose_x = mh_w_101_transpose_x_0, transpose_y = mh_w_101_transpose_y_0, x = var_10153_cast_fp16, y = key_heads_103_cast_fp16)[name = string("mh_w_101_cast_fp16")]; + tensor mh_w_103_cast_fp16 = add(x = mh_w_101_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_103_cast_fp16")]; + tensor var_10165_cast_fp16 = softmax(axis = var_9837, x = mh_w_103_cast_fp16)[name = string("op_10165_cast_fp16")]; + bool attn_51_transpose_x_0 = const()[name = string("attn_51_transpose_x_0"), val = bool(false)]; + bool attn_51_transpose_y_0 = const()[name = string("attn_51_transpose_y_0"), val = bool(true)]; + tensor attn_51_cast_fp16 = matmul(transpose_x = attn_51_transpose_x_0, transpose_y = attn_51_transpose_y_0, x = value_heads_103_cast_fp16, y = var_10165_cast_fp16)[name = string("attn_51_cast_fp16")]; + tensor var_10170 = const()[name = string("op_10170"), val = tensor([1, -1, 1, 1])]; + tensor input_201_cast_fp16 = reshape(shape = var_10170, x = attn_51_cast_fp16)[name = string("input_201_cast_fp16")]; + string obj_211_pad_type_0 = const()[name = string("obj_211_pad_type_0"), val = string("valid")]; + tensor obj_211_strides_0 = const()[name = string("obj_211_strides_0"), val = tensor([1, 1])]; + tensor obj_211_pad_0 = const()[name = string("obj_211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_211_dilations_0 = const()[name = string("obj_211_dilations_0"), val = tensor([1, 1])]; + int32 obj_211_groups_0 = const()[name = string("obj_211_groups_0"), val = int32(1)]; + tensor layers_25_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397722112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399819328))))[name = string("layers_25_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_211_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_211_dilations_0, groups = obj_211_groups_0, pad = obj_211_pad_0, pad_type = obj_211_pad_type_0, strides = obj_211_strides_0, weight = layers_25_self_attn_o_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("obj_211_cast_fp16")]; + tensor inputs_205_cast_fp16 = add(x = inputs_199_cast_fp16, y = obj_211_cast_fp16)[name = string("inputs_205_cast_fp16")]; + tensor inputs_sq_207_cast_fp16 = mul(x = inputs_205_cast_fp16, y = inputs_205_cast_fp16)[name = string("inputs_sq_207_cast_fp16")]; + tensor variance_207_axes_0 = const()[name = string("variance_207_axes_0"), val = tensor([1])]; + bool variance_207_keep_dims_0 = const()[name = string("variance_207_keep_dims_0"), val = bool(true)]; + tensor variance_207_cast_fp16 = reduce_mean(axes = variance_207_axes_0, keep_dims = variance_207_keep_dims_0, x = inputs_sq_207_cast_fp16)[name = string("variance_207_cast_fp16")]; + fp16 var_10188_to_fp16 = const()[name = string("op_10188_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10189_cast_fp16 = add(x = variance_207_cast_fp16, y = var_10188_to_fp16)[name = string("op_10189_cast_fp16")]; + fp32 var_10190_epsilon_0 = const()[name = string("op_10190_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10190_cast_fp16 = rsqrt(epsilon = var_10190_epsilon_0, x = var_10189_cast_fp16)[name = string("op_10190_cast_fp16")]; + tensor hidden_states_257_cast_fp16 = mul(x = inputs_205_cast_fp16, y = var_10190_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; + tensor w_207_to_fp16 = const()[name = string("w_207_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399819904)))]; + tensor input_203_cast_fp16 = mul(x = w_207_to_fp16, y = hidden_states_257_cast_fp16)[name = string("input_203_cast_fp16")]; + string input_205_pad_type_0 = const()[name = string("input_205_pad_type_0"), val = string("valid")]; + tensor input_205_strides_0 = const()[name = string("input_205_strides_0"), val = tensor([1, 1])]; + tensor input_205_pad_0 = const()[name = string("input_205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_205_dilations_0 = const()[name = string("input_205_dilations_0"), val = tensor([1, 1])]; + int32 input_205_groups_0 = const()[name = string("input_205_groups_0"), val = int32(1)]; + tensor layers_25_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399822016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402967808))))[name = string("layers_25_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_205_cast_fp16 = conv(dilations = input_205_dilations_0, groups = input_205_groups_0, pad = input_205_pad_0, pad_type = input_205_pad_type_0, strides = input_205_strides_0, weight = layers_25_mlp_gate_proj_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("input_205_cast_fp16")]; + tensor var_10204_cast_fp16 = silu(x = input_205_cast_fp16)[name = string("op_10204_cast_fp16")]; + string var_10210_pad_type_0 = const()[name = string("op_10210_pad_type_0"), val = string("valid")]; + tensor var_10210_strides_0 = const()[name = string("op_10210_strides_0"), val = tensor([1, 1])]; + tensor var_10210_pad_0 = const()[name = string("op_10210_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10210_dilations_0 = const()[name = string("op_10210_dilations_0"), val = tensor([1, 1])]; + int32 var_10210_groups_0 = const()[name = string("op_10210_groups_0"), val = int32(1)]; + tensor layers_25_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402968384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406114176))))[name = string("layers_25_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_10210_cast_fp16 = conv(dilations = var_10210_dilations_0, groups = var_10210_groups_0, pad = var_10210_pad_0, pad_type = var_10210_pad_type_0, strides = var_10210_strides_0, weight = layers_25_mlp_up_proj_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("op_10210_cast_fp16")]; + tensor input_207_cast_fp16 = mul(x = var_10204_cast_fp16, y = var_10210_cast_fp16)[name = string("input_207_cast_fp16")]; + string hidden_states_259_pad_type_0 = const()[name = string("hidden_states_259_pad_type_0"), val = string("valid")]; + tensor hidden_states_259_strides_0 = const()[name = string("hidden_states_259_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_259_pad_0 = const()[name = string("hidden_states_259_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_259_dilations_0 = const()[name = string("hidden_states_259_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_259_groups_0 = const()[name = string("hidden_states_259_groups_0"), val = int32(1)]; + tensor layers_25_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406114752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409260544))))[name = string("layers_25_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_259_cast_fp16 = conv(dilations = hidden_states_259_dilations_0, groups = hidden_states_259_groups_0, pad = hidden_states_259_pad_0, pad_type = hidden_states_259_pad_type_0, strides = hidden_states_259_strides_0, weight = layers_25_mlp_down_proj_weight_to_fp16_palettized, x = input_207_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + tensor inputs_207_cast_fp16 = add(x = inputs_205_cast_fp16, y = hidden_states_259_cast_fp16)[name = string("inputs_207_cast_fp16")]; + int32 var_10224 = const()[name = string("op_10224"), val = int32(3)]; + int32 var_10234 = const()[name = string("op_10234"), val = int32(-2)]; + int32 var_10242 = const()[name = string("op_10242"), val = int32(1)]; + tensor inputs_sq_209_cast_fp16 = mul(x = inputs_207_cast_fp16, y = inputs_207_cast_fp16)[name = string("inputs_sq_209_cast_fp16")]; + tensor variance_209_axes_0 = const()[name = string("variance_209_axes_0"), val = tensor([1])]; + bool variance_209_keep_dims_0 = const()[name = string("variance_209_keep_dims_0"), val = bool(true)]; + tensor variance_209_cast_fp16 = reduce_mean(axes = variance_209_axes_0, keep_dims = variance_209_keep_dims_0, x = inputs_sq_209_cast_fp16)[name = string("variance_209_cast_fp16")]; + fp16 var_10254_to_fp16 = const()[name = string("op_10254_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10255_cast_fp16 = add(x = variance_209_cast_fp16, y = var_10254_to_fp16)[name = string("op_10255_cast_fp16")]; + fp32 var_10256_epsilon_0 = const()[name = string("op_10256_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10256_cast_fp16 = rsqrt(epsilon = var_10256_epsilon_0, x = var_10255_cast_fp16)[name = string("op_10256_cast_fp16")]; + tensor hidden_states_261_cast_fp16 = mul(x = inputs_207_cast_fp16, y = var_10256_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; + tensor w_209_to_fp16 = const()[name = string("w_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409261120)))]; + tensor obj_213_cast_fp16 = mul(x = w_209_to_fp16, y = hidden_states_261_cast_fp16)[name = string("obj_213_cast_fp16")]; + string query_157_pad_type_0 = const()[name = string("query_157_pad_type_0"), val = string("valid")]; + tensor query_157_strides_0 = const()[name = string("query_157_strides_0"), val = tensor([1, 1])]; + tensor query_157_pad_0 = const()[name = string("query_157_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_157_dilations_0 = const()[name = string("query_157_dilations_0"), val = tensor([1, 1])]; + int32 query_157_groups_0 = const()[name = string("query_157_groups_0"), val = int32(1)]; + tensor layers_26_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409263232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411360448))))[name = string("layers_26_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_157_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_157_dilations_0, groups = query_157_groups_0, pad = query_157_pad_0, pad_type = query_157_pad_type_0, strides = query_157_strides_0, weight = layers_26_self_attn_q_proj_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("query_157_cast_fp16")]; + string current_key_105_pad_type_0 = const()[name = string("current_key_105_pad_type_0"), val = string("valid")]; + tensor current_key_105_strides_0 = const()[name = string("current_key_105_strides_0"), val = tensor([1, 1])]; + tensor current_key_105_pad_0 = const()[name = string("current_key_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_105_dilations_0 = const()[name = string("current_key_105_dilations_0"), val = tensor([1, 1])]; + int32 current_key_105_groups_0 = const()[name = string("current_key_105_groups_0"), val = int32(1)]; + tensor layers_26_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411361024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412409664))))[name = string("layers_26_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_105_cast_fp16 = conv(dilations = current_key_105_dilations_0, groups = current_key_105_groups_0, pad = current_key_105_pad_0, pad_type = current_key_105_pad_type_0, strides = current_key_105_strides_0, weight = layers_26_self_attn_k_proj_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("current_key_105_cast_fp16")]; + string current_value_53_pad_type_0 = const()[name = string("current_value_53_pad_type_0"), val = string("valid")]; + tensor current_value_53_strides_0 = const()[name = string("current_value_53_strides_0"), val = tensor([1, 1])]; + tensor current_value_53_pad_0 = const()[name = string("current_value_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_53_dilations_0 = const()[name = string("current_value_53_dilations_0"), val = tensor([1, 1])]; + int32 current_value_53_groups_0 = const()[name = string("current_value_53_groups_0"), val = int32(1)]; + tensor layers_26_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412410240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413458880))))[name = string("layers_26_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_53_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_53_dilations_0, groups = current_value_53_groups_0, pad = current_value_53_pad_0, pad_type = current_value_53_pad_type_0, strides = current_value_53_strides_0, weight = layers_26_self_attn_v_proj_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("current_value_53_cast_fp16")]; + tensor var_10293 = const()[name = string("op_10293"), val = tensor([16, 128, 1, 1])]; + tensor inputs_209_cast_fp16 = reshape(shape = var_10293, x = query_157_cast_fp16)[name = string("inputs_209_cast_fp16")]; + tensor inputs_sq_211_cast_fp16 = mul(x = inputs_209_cast_fp16, y = inputs_209_cast_fp16)[name = string("inputs_sq_211_cast_fp16")]; + tensor variance_211_axes_0 = const()[name = string("variance_211_axes_0"), val = tensor([1])]; + bool variance_211_keep_dims_0 = const()[name = string("variance_211_keep_dims_0"), val = bool(true)]; + tensor variance_211_cast_fp16 = reduce_mean(axes = variance_211_axes_0, keep_dims = variance_211_keep_dims_0, x = inputs_sq_211_cast_fp16)[name = string("variance_211_cast_fp16")]; + fp16 var_10299_to_fp16 = const()[name = string("op_10299_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10300_cast_fp16 = add(x = variance_211_cast_fp16, y = var_10299_to_fp16)[name = string("op_10300_cast_fp16")]; + fp32 var_10301_epsilon_0 = const()[name = string("op_10301_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10301_cast_fp16 = rsqrt(epsilon = var_10301_epsilon_0, x = var_10300_cast_fp16)[name = string("op_10301_cast_fp16")]; + tensor hidden_states_263_cast_fp16 = mul(x = inputs_209_cast_fp16, y = var_10301_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor w_211_to_fp16 = const()[name = string("w_211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413459456)))]; + tensor query_normed_53_cast_fp16 = mul(x = w_211_to_fp16, y = hidden_states_263_cast_fp16)[name = string("query_normed_53_cast_fp16")]; + tensor var_10309 = const()[name = string("op_10309"), val = tensor([8, 128, 1, 1])]; + tensor inputs_211_cast_fp16 = reshape(shape = var_10309, x = current_key_105_cast_fp16)[name = string("inputs_211_cast_fp16")]; + tensor inputs_sq_213_cast_fp16 = mul(x = inputs_211_cast_fp16, y = inputs_211_cast_fp16)[name = string("inputs_sq_213_cast_fp16")]; + tensor variance_213_axes_0 = const()[name = string("variance_213_axes_0"), val = tensor([1])]; + bool variance_213_keep_dims_0 = const()[name = string("variance_213_keep_dims_0"), val = bool(true)]; + tensor variance_213_cast_fp16 = reduce_mean(axes = variance_213_axes_0, keep_dims = variance_213_keep_dims_0, x = inputs_sq_213_cast_fp16)[name = string("variance_213_cast_fp16")]; + fp16 var_10315_to_fp16 = const()[name = string("op_10315_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10316_cast_fp16 = add(x = variance_213_cast_fp16, y = var_10315_to_fp16)[name = string("op_10316_cast_fp16")]; + fp32 var_10317_epsilon_0 = const()[name = string("op_10317_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10317_cast_fp16 = rsqrt(epsilon = var_10317_epsilon_0, x = var_10316_cast_fp16)[name = string("op_10317_cast_fp16")]; + tensor hidden_states_265_cast_fp16 = mul(x = inputs_211_cast_fp16, y = var_10317_cast_fp16)[name = string("hidden_states_265_cast_fp16")]; + tensor w_213_to_fp16 = const()[name = string("w_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413459776)))]; + tensor current_key_normed_53_cast_fp16 = mul(x = w_213_to_fp16, y = hidden_states_265_cast_fp16)[name = string("current_key_normed_53_cast_fp16")]; + tensor var_10335 = const()[name = string("op_10335"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_157_cast_fp16 = reshape(shape = var_10335, x = query_normed_53_cast_fp16)[name = string("mh_q_157_cast_fp16")]; + tensor var_10337 = const()[name = string("op_10337"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_105_cast_fp16 = reshape(shape = var_10337, x = current_key_normed_53_cast_fp16)[name = string("mh_k_105_cast_fp16")]; + tensor var_10341_cast_fp16 = mul(x = mh_q_157_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10341_cast_fp16")]; + tensor var_10346_begin_0 = const()[name = string("op_10346_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10346_end_0 = const()[name = string("op_10346_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_10346_end_mask_0 = const()[name = string("op_10346_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_10346_cast_fp16 = slice_by_index(begin = var_10346_begin_0, end = var_10346_end_0, end_mask = var_10346_end_mask_0, x = mh_q_157_cast_fp16)[name = string("op_10346_cast_fp16")]; + tensor var_10352_begin_0 = const()[name = string("op_10352_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10352_end_0 = const()[name = string("op_10352_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_10352_end_mask_0 = const()[name = string("op_10352_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10352_cast_fp16 = slice_by_index(begin = var_10352_begin_0, end = var_10352_end_0, end_mask = var_10352_end_mask_0, x = mh_q_157_cast_fp16)[name = string("op_10352_cast_fp16")]; + fp16 const_615_promoted_to_fp16 = const()[name = string("const_615_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10354_cast_fp16 = mul(x = var_10352_cast_fp16, y = const_615_promoted_to_fp16)[name = string("op_10354_cast_fp16")]; + bool var_10356_interleave_0 = const()[name = string("op_10356_interleave_0"), val = bool(false)]; + tensor var_10356_cast_fp16 = concat(axis = var_10234, interleave = var_10356_interleave_0, values = (var_10354_cast_fp16, var_10346_cast_fp16))[name = string("op_10356_cast_fp16")]; + tensor var_10357_cast_fp16 = mul(x = var_10356_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10357_cast_fp16")]; + tensor mh_q_159_cast_fp16 = add(x = var_10341_cast_fp16, y = var_10357_cast_fp16)[name = string("mh_q_159_cast_fp16")]; + tensor var_10359_cast_fp16 = mul(x = mh_k_105_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10359_cast_fp16")]; + tensor var_10364_begin_0 = const()[name = string("op_10364_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10364_end_0 = const()[name = string("op_10364_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_10364_end_mask_0 = const()[name = string("op_10364_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_10364_cast_fp16 = slice_by_index(begin = var_10364_begin_0, end = var_10364_end_0, end_mask = var_10364_end_mask_0, x = mh_k_105_cast_fp16)[name = string("op_10364_cast_fp16")]; + tensor var_10370_begin_0 = const()[name = string("op_10370_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10370_end_0 = const()[name = string("op_10370_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_10370_end_mask_0 = const()[name = string("op_10370_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10370_cast_fp16 = slice_by_index(begin = var_10370_begin_0, end = var_10370_end_0, end_mask = var_10370_end_mask_0, x = mh_k_105_cast_fp16)[name = string("op_10370_cast_fp16")]; + fp16 const_618_promoted_to_fp16 = const()[name = string("const_618_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10372_cast_fp16 = mul(x = var_10370_cast_fp16, y = const_618_promoted_to_fp16)[name = string("op_10372_cast_fp16")]; + bool var_10374_interleave_0 = const()[name = string("op_10374_interleave_0"), val = bool(false)]; + tensor var_10374_cast_fp16 = concat(axis = var_10234, interleave = var_10374_interleave_0, values = (var_10372_cast_fp16, var_10364_cast_fp16))[name = string("op_10374_cast_fp16")]; + tensor var_10375_cast_fp16 = mul(x = var_10374_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10375_cast_fp16")]; + tensor mh_k_107_cast_fp16 = add(x = var_10359_cast_fp16, y = var_10375_cast_fp16)[name = string("mh_k_107_cast_fp16")]; + tensor var_10379 = const()[name = string("op_10379"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_107_cast_fp16 = reshape(shape = var_10379, x = mh_k_107_cast_fp16)[name = string("current_key_107_cast_fp16")]; + tensor var_10386_cast_fp16 = mul(x = var_101_cast_fp16_26, y = var_323_cast_fp16)[name = string("op_10386_cast_fp16")]; + tensor var_10387_cast_fp16 = mul(x = current_key_107_cast_fp16, y = var_321_cast_fp16)[name = string("op_10387_cast_fp16")]; + tensor key_159_cast_fp16 = add(x = var_10386_cast_fp16, y = var_10387_cast_fp16)[name = string("key_159_cast_fp16")]; + tensor var_10390_cast_fp16 = mul(x = var_132_cast_fp16_26, y = var_323_cast_fp16)[name = string("op_10390_cast_fp16")]; + tensor var_10391_cast_fp16 = mul(x = current_value_53_cast_fp16, y = var_321_cast_fp16)[name = string("op_10391_cast_fp16")]; + tensor value_105_cast_fp16 = add(x = var_10390_cast_fp16, y = var_10391_cast_fp16)[name = string("value_105_cast_fp16")]; + tensor var_10395 = const()[name = string("op_10395"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_105_cast_fp16 = reshape(shape = var_10395, x = key_159_cast_fp16)[name = string("key_heads_105_cast_fp16")]; + tensor var_10397 = const()[name = string("op_10397"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_105_cast_fp16 = reshape(shape = var_10397, x = value_105_cast_fp16)[name = string("value_heads_105_cast_fp16")]; + tensor var_10400_begin_0 = const()[name = string("op_10400_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10400_end_0 = const()[name = string("op_10400_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10400_end_mask_0 = const()[name = string("op_10400_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10400_cast_fp16 = slice_by_index(begin = var_10400_begin_0, end = var_10400_end_0, end_mask = var_10400_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10400_cast_fp16")]; + tensor var_10404_begin_0 = const()[name = string("op_10404_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10404_end_0 = const()[name = string("op_10404_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10404_end_mask_0 = const()[name = string("op_10404_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10404_cast_fp16 = slice_by_index(begin = var_10404_begin_0, end = var_10404_end_0, end_mask = var_10404_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10404_cast_fp16")]; + tensor var_10416_begin_0 = const()[name = string("op_10416_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10416_end_0 = const()[name = string("op_10416_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10416_end_mask_0 = const()[name = string("op_10416_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10416_cast_fp16 = slice_by_index(begin = var_10416_begin_0, end = var_10416_end_0, end_mask = var_10416_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10416_cast_fp16")]; + tensor var_10420_begin_0 = const()[name = string("op_10420_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10420_end_0 = const()[name = string("op_10420_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10420_end_mask_0 = const()[name = string("op_10420_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10420_cast_fp16 = slice_by_index(begin = var_10420_begin_0, end = var_10420_end_0, end_mask = var_10420_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10420_cast_fp16")]; + tensor var_10432_begin_0 = const()[name = string("op_10432_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10432_end_0 = const()[name = string("op_10432_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10432_end_mask_0 = const()[name = string("op_10432_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10432_cast_fp16 = slice_by_index(begin = var_10432_begin_0, end = var_10432_end_0, end_mask = var_10432_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10432_cast_fp16")]; + tensor var_10436_begin_0 = const()[name = string("op_10436_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10436_end_0 = const()[name = string("op_10436_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10436_end_mask_0 = const()[name = string("op_10436_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10436_cast_fp16 = slice_by_index(begin = var_10436_begin_0, end = var_10436_end_0, end_mask = var_10436_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10436_cast_fp16")]; + tensor var_10448_begin_0 = const()[name = string("op_10448_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10448_end_0 = const()[name = string("op_10448_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10448_end_mask_0 = const()[name = string("op_10448_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10448_cast_fp16 = slice_by_index(begin = var_10448_begin_0, end = var_10448_end_0, end_mask = var_10448_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10448_cast_fp16")]; + tensor var_10452_begin_0 = const()[name = string("op_10452_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10452_end_0 = const()[name = string("op_10452_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10452_end_mask_0 = const()[name = string("op_10452_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10452_cast_fp16 = slice_by_index(begin = var_10452_begin_0, end = var_10452_end_0, end_mask = var_10452_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10452_cast_fp16")]; + tensor var_10464_begin_0 = const()[name = string("op_10464_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10464_end_0 = const()[name = string("op_10464_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10464_end_mask_0 = const()[name = string("op_10464_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10464_cast_fp16 = slice_by_index(begin = var_10464_begin_0, end = var_10464_end_0, end_mask = var_10464_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10464_cast_fp16")]; + tensor var_10468_begin_0 = const()[name = string("op_10468_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10468_end_0 = const()[name = string("op_10468_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10468_end_mask_0 = const()[name = string("op_10468_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10468_cast_fp16 = slice_by_index(begin = var_10468_begin_0, end = var_10468_end_0, end_mask = var_10468_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10468_cast_fp16")]; + tensor var_10480_begin_0 = const()[name = string("op_10480_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10480_end_0 = const()[name = string("op_10480_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10480_end_mask_0 = const()[name = string("op_10480_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10480_cast_fp16 = slice_by_index(begin = var_10480_begin_0, end = var_10480_end_0, end_mask = var_10480_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10480_cast_fp16")]; + tensor var_10484_begin_0 = const()[name = string("op_10484_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10484_end_0 = const()[name = string("op_10484_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10484_end_mask_0 = const()[name = string("op_10484_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10484_cast_fp16 = slice_by_index(begin = var_10484_begin_0, end = var_10484_end_0, end_mask = var_10484_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10484_cast_fp16")]; + tensor var_10496_begin_0 = const()[name = string("op_10496_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10496_end_0 = const()[name = string("op_10496_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10496_end_mask_0 = const()[name = string("op_10496_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10496_cast_fp16 = slice_by_index(begin = var_10496_begin_0, end = var_10496_end_0, end_mask = var_10496_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10496_cast_fp16")]; + tensor var_10500_begin_0 = const()[name = string("op_10500_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10500_end_0 = const()[name = string("op_10500_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10500_end_mask_0 = const()[name = string("op_10500_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10500_cast_fp16 = slice_by_index(begin = var_10500_begin_0, end = var_10500_end_0, end_mask = var_10500_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10500_cast_fp16")]; + tensor var_10512_begin_0 = const()[name = string("op_10512_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10512_end_0 = const()[name = string("op_10512_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10512_end_mask_0 = const()[name = string("op_10512_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10512_cast_fp16 = slice_by_index(begin = var_10512_begin_0, end = var_10512_end_0, end_mask = var_10512_end_mask_0, x = key_heads_105_cast_fp16)[name = string("op_10512_cast_fp16")]; + tensor var_10516_begin_0 = const()[name = string("op_10516_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10516_end_0 = const()[name = string("op_10516_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10516_end_mask_0 = const()[name = string("op_10516_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10516_cast_fp16 = slice_by_index(begin = var_10516_begin_0, end = var_10516_end_0, end_mask = var_10516_end_mask_0, x = value_heads_105_cast_fp16)[name = string("op_10516_cast_fp16")]; + bool key_heads_107_interleave_0 = const()[name = string("key_heads_107_interleave_0"), val = bool(false)]; + tensor key_heads_107_cast_fp16 = concat(axis = var_10242, interleave = key_heads_107_interleave_0, values = (var_10400_cast_fp16, var_10400_cast_fp16, var_10416_cast_fp16, var_10416_cast_fp16, var_10432_cast_fp16, var_10432_cast_fp16, var_10448_cast_fp16, var_10448_cast_fp16, var_10464_cast_fp16, var_10464_cast_fp16, var_10480_cast_fp16, var_10480_cast_fp16, var_10496_cast_fp16, var_10496_cast_fp16, var_10512_cast_fp16, var_10512_cast_fp16))[name = string("key_heads_107_cast_fp16")]; + bool value_heads_107_interleave_0 = const()[name = string("value_heads_107_interleave_0"), val = bool(false)]; + tensor value_heads_107_cast_fp16 = concat(axis = var_10242, interleave = value_heads_107_interleave_0, values = (var_10404_cast_fp16, var_10404_cast_fp16, var_10420_cast_fp16, var_10420_cast_fp16, var_10436_cast_fp16, var_10436_cast_fp16, var_10452_cast_fp16, var_10452_cast_fp16, var_10468_cast_fp16, var_10468_cast_fp16, var_10484_cast_fp16, var_10484_cast_fp16, var_10500_cast_fp16, var_10500_cast_fp16, var_10516_cast_fp16, var_10516_cast_fp16))[name = string("value_heads_107_cast_fp16")]; + fp16 var_10539_to_fp16 = const()[name = string("op_10539_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_10540_cast_fp16 = mul(x = mh_q_159_cast_fp16, y = var_10539_to_fp16)[name = string("op_10540_cast_fp16")]; + bool mh_w_105_transpose_x_0 = const()[name = string("mh_w_105_transpose_x_0"), val = bool(true)]; + bool mh_w_105_transpose_y_0 = const()[name = string("mh_w_105_transpose_y_0"), val = bool(false)]; + tensor mh_w_105_cast_fp16 = matmul(transpose_x = mh_w_105_transpose_x_0, transpose_y = mh_w_105_transpose_y_0, x = var_10540_cast_fp16, y = key_heads_107_cast_fp16)[name = string("mh_w_105_cast_fp16")]; + tensor mh_w_107_cast_fp16 = add(x = mh_w_105_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_107_cast_fp16")]; + tensor var_10552_cast_fp16 = softmax(axis = var_10224, x = mh_w_107_cast_fp16)[name = string("op_10552_cast_fp16")]; + bool attn_53_transpose_x_0 = const()[name = string("attn_53_transpose_x_0"), val = bool(false)]; + bool attn_53_transpose_y_0 = const()[name = string("attn_53_transpose_y_0"), val = bool(true)]; + tensor attn_53_cast_fp16 = matmul(transpose_x = attn_53_transpose_x_0, transpose_y = attn_53_transpose_y_0, x = value_heads_107_cast_fp16, y = var_10552_cast_fp16)[name = string("attn_53_cast_fp16")]; + tensor var_10557 = const()[name = string("op_10557"), val = tensor([1, -1, 1, 1])]; + tensor input_209_cast_fp16 = reshape(shape = var_10557, x = attn_53_cast_fp16)[name = string("input_209_cast_fp16")]; + string obj_219_pad_type_0 = const()[name = string("obj_219_pad_type_0"), val = string("valid")]; + tensor obj_219_strides_0 = const()[name = string("obj_219_strides_0"), val = tensor([1, 1])]; + tensor obj_219_pad_0 = const()[name = string("obj_219_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_219_dilations_0 = const()[name = string("obj_219_dilations_0"), val = tensor([1, 1])]; + int32 obj_219_groups_0 = const()[name = string("obj_219_groups_0"), val = int32(1)]; + tensor layers_26_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413460096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415557312))))[name = string("layers_26_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_219_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_219_dilations_0, groups = obj_219_groups_0, pad = obj_219_pad_0, pad_type = obj_219_pad_type_0, strides = obj_219_strides_0, weight = layers_26_self_attn_o_proj_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = string("obj_219_cast_fp16")]; + tensor inputs_213_cast_fp16 = add(x = inputs_207_cast_fp16, y = obj_219_cast_fp16)[name = string("inputs_213_cast_fp16")]; + tensor inputs_sq_215_cast_fp16 = mul(x = inputs_213_cast_fp16, y = inputs_213_cast_fp16)[name = string("inputs_sq_215_cast_fp16")]; + tensor variance_215_axes_0 = const()[name = string("variance_215_axes_0"), val = tensor([1])]; + bool variance_215_keep_dims_0 = const()[name = string("variance_215_keep_dims_0"), val = bool(true)]; + tensor variance_215_cast_fp16 = reduce_mean(axes = variance_215_axes_0, keep_dims = variance_215_keep_dims_0, x = inputs_sq_215_cast_fp16)[name = string("variance_215_cast_fp16")]; + fp16 var_10575_to_fp16 = const()[name = string("op_10575_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10576_cast_fp16 = add(x = variance_215_cast_fp16, y = var_10575_to_fp16)[name = string("op_10576_cast_fp16")]; + fp32 var_10577_epsilon_0 = const()[name = string("op_10577_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10577_cast_fp16 = rsqrt(epsilon = var_10577_epsilon_0, x = var_10576_cast_fp16)[name = string("op_10577_cast_fp16")]; + tensor hidden_states_267_cast_fp16 = mul(x = inputs_213_cast_fp16, y = var_10577_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; + tensor w_215_to_fp16 = const()[name = string("w_215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415557888)))]; + tensor input_211_cast_fp16 = mul(x = w_215_to_fp16, y = hidden_states_267_cast_fp16)[name = string("input_211_cast_fp16")]; + string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; + tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; + tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; + int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; + tensor layers_26_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415560000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418705792))))[name = string("layers_26_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_213_cast_fp16 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = layers_26_mlp_gate_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("input_213_cast_fp16")]; + tensor var_10591_cast_fp16 = silu(x = input_213_cast_fp16)[name = string("op_10591_cast_fp16")]; + string var_10597_pad_type_0 = const()[name = string("op_10597_pad_type_0"), val = string("valid")]; + tensor var_10597_strides_0 = const()[name = string("op_10597_strides_0"), val = tensor([1, 1])]; + tensor var_10597_pad_0 = const()[name = string("op_10597_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10597_dilations_0 = const()[name = string("op_10597_dilations_0"), val = tensor([1, 1])]; + int32 var_10597_groups_0 = const()[name = string("op_10597_groups_0"), val = int32(1)]; + tensor layers_26_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421852160))))[name = string("layers_26_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_10597_cast_fp16 = conv(dilations = var_10597_dilations_0, groups = var_10597_groups_0, pad = var_10597_pad_0, pad_type = var_10597_pad_type_0, strides = var_10597_strides_0, weight = layers_26_mlp_up_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("op_10597_cast_fp16")]; + tensor input_215_cast_fp16 = mul(x = var_10591_cast_fp16, y = var_10597_cast_fp16)[name = string("input_215_cast_fp16")]; + string hidden_states_269_pad_type_0 = const()[name = string("hidden_states_269_pad_type_0"), val = string("valid")]; + tensor hidden_states_269_strides_0 = const()[name = string("hidden_states_269_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_269_pad_0 = const()[name = string("hidden_states_269_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_269_dilations_0 = const()[name = string("hidden_states_269_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_269_groups_0 = const()[name = string("hidden_states_269_groups_0"), val = int32(1)]; + tensor layers_26_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421852736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424998528))))[name = string("layers_26_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_269_cast_fp16 = conv(dilations = hidden_states_269_dilations_0, groups = hidden_states_269_groups_0, pad = hidden_states_269_pad_0, pad_type = hidden_states_269_pad_type_0, strides = hidden_states_269_strides_0, weight = layers_26_mlp_down_proj_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + tensor inputs_215_cast_fp16 = add(x = inputs_213_cast_fp16, y = hidden_states_269_cast_fp16)[name = string("inputs_215_cast_fp16")]; + int32 var_10611 = const()[name = string("op_10611"), val = int32(3)]; + int32 var_10621 = const()[name = string("op_10621"), val = int32(-2)]; + int32 var_10629 = const()[name = string("op_10629"), val = int32(1)]; + tensor inputs_sq_217_cast_fp16 = mul(x = inputs_215_cast_fp16, y = inputs_215_cast_fp16)[name = string("inputs_sq_217_cast_fp16")]; + tensor variance_217_axes_0 = const()[name = string("variance_217_axes_0"), val = tensor([1])]; + bool variance_217_keep_dims_0 = const()[name = string("variance_217_keep_dims_0"), val = bool(true)]; + tensor variance_217_cast_fp16 = reduce_mean(axes = variance_217_axes_0, keep_dims = variance_217_keep_dims_0, x = inputs_sq_217_cast_fp16)[name = string("variance_217_cast_fp16")]; + fp16 var_10641_to_fp16 = const()[name = string("op_10641_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10642_cast_fp16 = add(x = variance_217_cast_fp16, y = var_10641_to_fp16)[name = string("op_10642_cast_fp16")]; + fp32 var_10643_epsilon_0 = const()[name = string("op_10643_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10643_cast_fp16 = rsqrt(epsilon = var_10643_epsilon_0, x = var_10642_cast_fp16)[name = string("op_10643_cast_fp16")]; + tensor hidden_states_271_cast_fp16 = mul(x = inputs_215_cast_fp16, y = var_10643_cast_fp16)[name = string("hidden_states_271_cast_fp16")]; + tensor w_217_to_fp16 = const()[name = string("w_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424999104)))]; + tensor obj_221_cast_fp16 = mul(x = w_217_to_fp16, y = hidden_states_271_cast_fp16)[name = string("obj_221_cast_fp16")]; + string query_163_pad_type_0 = const()[name = string("query_163_pad_type_0"), val = string("valid")]; + tensor query_163_strides_0 = const()[name = string("query_163_strides_0"), val = tensor([1, 1])]; + tensor query_163_pad_0 = const()[name = string("query_163_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_163_dilations_0 = const()[name = string("query_163_dilations_0"), val = tensor([1, 1])]; + int32 query_163_groups_0 = const()[name = string("query_163_groups_0"), val = int32(1)]; + tensor layers_27_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425001216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427098432))))[name = string("layers_27_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_163_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_163_dilations_0, groups = query_163_groups_0, pad = query_163_pad_0, pad_type = query_163_pad_type_0, strides = query_163_strides_0, weight = layers_27_self_attn_q_proj_weight_to_fp16_palettized, x = obj_221_cast_fp16)[name = string("query_163_cast_fp16")]; + string current_key_109_pad_type_0 = const()[name = string("current_key_109_pad_type_0"), val = string("valid")]; + tensor current_key_109_strides_0 = const()[name = string("current_key_109_strides_0"), val = tensor([1, 1])]; + tensor current_key_109_pad_0 = const()[name = string("current_key_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_109_dilations_0 = const()[name = string("current_key_109_dilations_0"), val = tensor([1, 1])]; + int32 current_key_109_groups_0 = const()[name = string("current_key_109_groups_0"), val = int32(1)]; + tensor layers_27_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427099008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428147648))))[name = string("layers_27_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_109_cast_fp16 = conv(dilations = current_key_109_dilations_0, groups = current_key_109_groups_0, pad = current_key_109_pad_0, pad_type = current_key_109_pad_type_0, strides = current_key_109_strides_0, weight = layers_27_self_attn_k_proj_weight_to_fp16_palettized, x = obj_221_cast_fp16)[name = string("current_key_109_cast_fp16")]; + string current_value_pad_type_0 = const()[name = string("current_value_pad_type_0"), val = string("valid")]; + tensor current_value_strides_0 = const()[name = string("current_value_strides_0"), val = tensor([1, 1])]; + tensor current_value_pad_0 = const()[name = string("current_value_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_dilations_0 = const()[name = string("current_value_dilations_0"), val = tensor([1, 1])]; + int32 current_value_groups_0 = const()[name = string("current_value_groups_0"), val = int32(1)]; + tensor layers_27_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428148224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429196864))))[name = string("layers_27_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_27_self_attn_v_proj_weight_to_fp16_palettized, x = obj_221_cast_fp16)[name = string("current_value_cast_fp16")]; + tensor var_10680 = const()[name = string("op_10680"), val = tensor([16, 128, 1, 1])]; + tensor inputs_217_cast_fp16 = reshape(shape = var_10680, x = query_163_cast_fp16)[name = string("inputs_217_cast_fp16")]; + tensor inputs_sq_219_cast_fp16 = mul(x = inputs_217_cast_fp16, y = inputs_217_cast_fp16)[name = string("inputs_sq_219_cast_fp16")]; + tensor variance_219_axes_0 = const()[name = string("variance_219_axes_0"), val = tensor([1])]; + bool variance_219_keep_dims_0 = const()[name = string("variance_219_keep_dims_0"), val = bool(true)]; + tensor variance_219_cast_fp16 = reduce_mean(axes = variance_219_axes_0, keep_dims = variance_219_keep_dims_0, x = inputs_sq_219_cast_fp16)[name = string("variance_219_cast_fp16")]; + fp16 var_10686_to_fp16 = const()[name = string("op_10686_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10687_cast_fp16 = add(x = variance_219_cast_fp16, y = var_10686_to_fp16)[name = string("op_10687_cast_fp16")]; + fp32 var_10688_epsilon_0 = const()[name = string("op_10688_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10688_cast_fp16 = rsqrt(epsilon = var_10688_epsilon_0, x = var_10687_cast_fp16)[name = string("op_10688_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = mul(x = inputs_217_cast_fp16, y = var_10688_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor w_219_to_fp16 = const()[name = string("w_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429197440)))]; + tensor query_normed_cast_fp16 = mul(x = w_219_to_fp16, y = hidden_states_273_cast_fp16)[name = string("query_normed_cast_fp16")]; + tensor var_10696 = const()[name = string("op_10696"), val = tensor([8, 128, 1, 1])]; + tensor inputs_219_cast_fp16 = reshape(shape = var_10696, x = current_key_109_cast_fp16)[name = string("inputs_219_cast_fp16")]; + tensor inputs_sq_221_cast_fp16 = mul(x = inputs_219_cast_fp16, y = inputs_219_cast_fp16)[name = string("inputs_sq_221_cast_fp16")]; + tensor variance_221_axes_0 = const()[name = string("variance_221_axes_0"), val = tensor([1])]; + bool variance_221_keep_dims_0 = const()[name = string("variance_221_keep_dims_0"), val = bool(true)]; + tensor variance_221_cast_fp16 = reduce_mean(axes = variance_221_axes_0, keep_dims = variance_221_keep_dims_0, x = inputs_sq_221_cast_fp16)[name = string("variance_221_cast_fp16")]; + fp16 var_10702_to_fp16 = const()[name = string("op_10702_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10703_cast_fp16 = add(x = variance_221_cast_fp16, y = var_10702_to_fp16)[name = string("op_10703_cast_fp16")]; + fp32 var_10704_epsilon_0 = const()[name = string("op_10704_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10704_cast_fp16 = rsqrt(epsilon = var_10704_epsilon_0, x = var_10703_cast_fp16)[name = string("op_10704_cast_fp16")]; + tensor hidden_states_275_cast_fp16 = mul(x = inputs_219_cast_fp16, y = var_10704_cast_fp16)[name = string("hidden_states_275_cast_fp16")]; + tensor w_221_to_fp16 = const()[name = string("w_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429197760)))]; + tensor current_key_normed_cast_fp16 = mul(x = w_221_to_fp16, y = hidden_states_275_cast_fp16)[name = string("current_key_normed_cast_fp16")]; + tensor var_10722 = const()[name = string("op_10722"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_163_cast_fp16 = reshape(shape = var_10722, x = query_normed_cast_fp16)[name = string("mh_q_163_cast_fp16")]; + tensor var_10724 = const()[name = string("op_10724"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_109_cast_fp16 = reshape(shape = var_10724, x = current_key_normed_cast_fp16)[name = string("mh_k_109_cast_fp16")]; + tensor var_10728_cast_fp16 = mul(x = mh_q_163_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10728_cast_fp16")]; + tensor var_10733_begin_0 = const()[name = string("op_10733_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10733_end_0 = const()[name = string("op_10733_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_10733_end_mask_0 = const()[name = string("op_10733_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_10733_cast_fp16 = slice_by_index(begin = var_10733_begin_0, end = var_10733_end_0, end_mask = var_10733_end_mask_0, x = mh_q_163_cast_fp16)[name = string("op_10733_cast_fp16")]; + tensor var_10739_begin_0 = const()[name = string("op_10739_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10739_end_0 = const()[name = string("op_10739_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_10739_end_mask_0 = const()[name = string("op_10739_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10739_cast_fp16 = slice_by_index(begin = var_10739_begin_0, end = var_10739_end_0, end_mask = var_10739_end_mask_0, x = mh_q_163_cast_fp16)[name = string("op_10739_cast_fp16")]; + fp16 const_638_promoted_to_fp16 = const()[name = string("const_638_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10741_cast_fp16 = mul(x = var_10739_cast_fp16, y = const_638_promoted_to_fp16)[name = string("op_10741_cast_fp16")]; + bool var_10743_interleave_0 = const()[name = string("op_10743_interleave_0"), val = bool(false)]; + tensor var_10743_cast_fp16 = concat(axis = var_10621, interleave = var_10743_interleave_0, values = (var_10741_cast_fp16, var_10733_cast_fp16))[name = string("op_10743_cast_fp16")]; + tensor var_10744_cast_fp16 = mul(x = var_10743_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10744_cast_fp16")]; + tensor mh_q_165_cast_fp16 = add(x = var_10728_cast_fp16, y = var_10744_cast_fp16)[name = string("mh_q_165_cast_fp16")]; + tensor var_10746_cast_fp16 = mul(x = mh_k_109_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10746_cast_fp16")]; + tensor var_10751_begin_0 = const()[name = string("op_10751_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10751_end_0 = const()[name = string("op_10751_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_10751_end_mask_0 = const()[name = string("op_10751_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_10751_cast_fp16 = slice_by_index(begin = var_10751_begin_0, end = var_10751_end_0, end_mask = var_10751_end_mask_0, x = mh_k_109_cast_fp16)[name = string("op_10751_cast_fp16")]; + tensor var_10757_begin_0 = const()[name = string("op_10757_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_10757_end_0 = const()[name = string("op_10757_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_10757_end_mask_0 = const()[name = string("op_10757_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10757_cast_fp16 = slice_by_index(begin = var_10757_begin_0, end = var_10757_end_0, end_mask = var_10757_end_mask_0, x = mh_k_109_cast_fp16)[name = string("op_10757_cast_fp16")]; + fp16 const_641_promoted_to_fp16 = const()[name = string("const_641_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10759_cast_fp16 = mul(x = var_10757_cast_fp16, y = const_641_promoted_to_fp16)[name = string("op_10759_cast_fp16")]; + bool var_10761_interleave_0 = const()[name = string("op_10761_interleave_0"), val = bool(false)]; + tensor var_10761_cast_fp16 = concat(axis = var_10621, interleave = var_10761_interleave_0, values = (var_10759_cast_fp16, var_10751_cast_fp16))[name = string("op_10761_cast_fp16")]; + tensor var_10762_cast_fp16 = mul(x = var_10761_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10762_cast_fp16")]; + tensor mh_k_cast_fp16 = add(x = var_10746_cast_fp16, y = var_10762_cast_fp16)[name = string("mh_k_cast_fp16")]; + tensor var_10766 = const()[name = string("op_10766"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_cast_fp16 = reshape(shape = var_10766, x = mh_k_cast_fp16)[name = string("current_key_cast_fp16")]; + tensor var_10773_cast_fp16 = mul(x = var_101_cast_fp16_27, y = var_323_cast_fp16)[name = string("op_10773_cast_fp16")]; + tensor var_10774_cast_fp16 = mul(x = current_key_cast_fp16, y = var_321_cast_fp16)[name = string("op_10774_cast_fp16")]; + tensor key_165_cast_fp16 = add(x = var_10773_cast_fp16, y = var_10774_cast_fp16)[name = string("key_165_cast_fp16")]; + tensor var_10777_cast_fp16 = mul(x = var_132_cast_fp16_27, y = var_323_cast_fp16)[name = string("op_10777_cast_fp16")]; + tensor var_10778_cast_fp16 = mul(x = current_value_cast_fp16, y = var_321_cast_fp16)[name = string("op_10778_cast_fp16")]; + tensor value_109_cast_fp16 = add(x = var_10777_cast_fp16, y = var_10778_cast_fp16)[name = string("value_109_cast_fp16")]; + tensor var_10782 = const()[name = string("op_10782"), val = tensor([1, 8, 128, 256])]; + tensor key_heads_109_cast_fp16 = reshape(shape = var_10782, x = key_165_cast_fp16)[name = string("key_heads_109_cast_fp16")]; + tensor var_10784 = const()[name = string("op_10784"), val = tensor([1, 8, 128, 256])]; + tensor value_heads_109_cast_fp16 = reshape(shape = var_10784, x = value_109_cast_fp16)[name = string("value_heads_109_cast_fp16")]; + tensor var_10787_begin_0 = const()[name = string("op_10787_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10787_end_0 = const()[name = string("op_10787_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10787_end_mask_0 = const()[name = string("op_10787_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10787_cast_fp16 = slice_by_index(begin = var_10787_begin_0, end = var_10787_end_0, end_mask = var_10787_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10787_cast_fp16")]; + tensor var_10791_begin_0 = const()[name = string("op_10791_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10791_end_0 = const()[name = string("op_10791_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10791_end_mask_0 = const()[name = string("op_10791_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10791_cast_fp16 = slice_by_index(begin = var_10791_begin_0, end = var_10791_end_0, end_mask = var_10791_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10791_cast_fp16")]; + tensor var_10803_begin_0 = const()[name = string("op_10803_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10803_end_0 = const()[name = string("op_10803_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10803_end_mask_0 = const()[name = string("op_10803_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10803_cast_fp16 = slice_by_index(begin = var_10803_begin_0, end = var_10803_end_0, end_mask = var_10803_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10803_cast_fp16")]; + tensor var_10807_begin_0 = const()[name = string("op_10807_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_10807_end_0 = const()[name = string("op_10807_end_0"), val = tensor([1, 2, 128, 256])]; + tensor var_10807_end_mask_0 = const()[name = string("op_10807_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10807_cast_fp16 = slice_by_index(begin = var_10807_begin_0, end = var_10807_end_0, end_mask = var_10807_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10807_cast_fp16")]; + tensor var_10819_begin_0 = const()[name = string("op_10819_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10819_end_0 = const()[name = string("op_10819_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10819_end_mask_0 = const()[name = string("op_10819_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10819_cast_fp16 = slice_by_index(begin = var_10819_begin_0, end = var_10819_end_0, end_mask = var_10819_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10819_cast_fp16")]; + tensor var_10823_begin_0 = const()[name = string("op_10823_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_10823_end_0 = const()[name = string("op_10823_end_0"), val = tensor([1, 3, 128, 256])]; + tensor var_10823_end_mask_0 = const()[name = string("op_10823_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10823_cast_fp16 = slice_by_index(begin = var_10823_begin_0, end = var_10823_end_0, end_mask = var_10823_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10823_cast_fp16")]; + tensor var_10835_begin_0 = const()[name = string("op_10835_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10835_end_0 = const()[name = string("op_10835_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10835_end_mask_0 = const()[name = string("op_10835_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10835_cast_fp16 = slice_by_index(begin = var_10835_begin_0, end = var_10835_end_0, end_mask = var_10835_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10835_cast_fp16")]; + tensor var_10839_begin_0 = const()[name = string("op_10839_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_10839_end_0 = const()[name = string("op_10839_end_0"), val = tensor([1, 4, 128, 256])]; + tensor var_10839_end_mask_0 = const()[name = string("op_10839_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10839_cast_fp16 = slice_by_index(begin = var_10839_begin_0, end = var_10839_end_0, end_mask = var_10839_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10839_cast_fp16")]; + tensor var_10851_begin_0 = const()[name = string("op_10851_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10851_end_0 = const()[name = string("op_10851_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10851_end_mask_0 = const()[name = string("op_10851_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10851_cast_fp16 = slice_by_index(begin = var_10851_begin_0, end = var_10851_end_0, end_mask = var_10851_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10851_cast_fp16")]; + tensor var_10855_begin_0 = const()[name = string("op_10855_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_10855_end_0 = const()[name = string("op_10855_end_0"), val = tensor([1, 5, 128, 256])]; + tensor var_10855_end_mask_0 = const()[name = string("op_10855_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10855_cast_fp16 = slice_by_index(begin = var_10855_begin_0, end = var_10855_end_0, end_mask = var_10855_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10855_cast_fp16")]; + tensor var_10867_begin_0 = const()[name = string("op_10867_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10867_end_0 = const()[name = string("op_10867_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10867_end_mask_0 = const()[name = string("op_10867_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10867_cast_fp16 = slice_by_index(begin = var_10867_begin_0, end = var_10867_end_0, end_mask = var_10867_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10867_cast_fp16")]; + tensor var_10871_begin_0 = const()[name = string("op_10871_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_10871_end_0 = const()[name = string("op_10871_end_0"), val = tensor([1, 6, 128, 256])]; + tensor var_10871_end_mask_0 = const()[name = string("op_10871_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10871_cast_fp16 = slice_by_index(begin = var_10871_begin_0, end = var_10871_end_0, end_mask = var_10871_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10871_cast_fp16")]; + tensor var_10883_begin_0 = const()[name = string("op_10883_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10883_end_0 = const()[name = string("op_10883_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10883_end_mask_0 = const()[name = string("op_10883_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10883_cast_fp16 = slice_by_index(begin = var_10883_begin_0, end = var_10883_end_0, end_mask = var_10883_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10883_cast_fp16")]; + tensor var_10887_begin_0 = const()[name = string("op_10887_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_10887_end_0 = const()[name = string("op_10887_end_0"), val = tensor([1, 7, 128, 256])]; + tensor var_10887_end_mask_0 = const()[name = string("op_10887_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10887_cast_fp16 = slice_by_index(begin = var_10887_begin_0, end = var_10887_end_0, end_mask = var_10887_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10887_cast_fp16")]; + tensor var_10899_begin_0 = const()[name = string("op_10899_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10899_end_0 = const()[name = string("op_10899_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10899_end_mask_0 = const()[name = string("op_10899_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10899_cast_fp16 = slice_by_index(begin = var_10899_begin_0, end = var_10899_end_0, end_mask = var_10899_end_mask_0, x = key_heads_109_cast_fp16)[name = string("op_10899_cast_fp16")]; + tensor var_10903_begin_0 = const()[name = string("op_10903_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_10903_end_0 = const()[name = string("op_10903_end_0"), val = tensor([1, 1, 128, 256])]; + tensor var_10903_end_mask_0 = const()[name = string("op_10903_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_10903_cast_fp16 = slice_by_index(begin = var_10903_begin_0, end = var_10903_end_0, end_mask = var_10903_end_mask_0, x = value_heads_109_cast_fp16)[name = string("op_10903_cast_fp16")]; + bool key_heads_interleave_0 = const()[name = string("key_heads_interleave_0"), val = bool(false)]; + tensor key_heads_cast_fp16 = concat(axis = var_10629, interleave = key_heads_interleave_0, values = (var_10787_cast_fp16, var_10787_cast_fp16, var_10803_cast_fp16, var_10803_cast_fp16, var_10819_cast_fp16, var_10819_cast_fp16, var_10835_cast_fp16, var_10835_cast_fp16, var_10851_cast_fp16, var_10851_cast_fp16, var_10867_cast_fp16, var_10867_cast_fp16, var_10883_cast_fp16, var_10883_cast_fp16, var_10899_cast_fp16, var_10899_cast_fp16))[name = string("key_heads_cast_fp16")]; + bool value_heads_interleave_0 = const()[name = string("value_heads_interleave_0"), val = bool(false)]; + tensor value_heads_cast_fp16 = concat(axis = var_10629, interleave = value_heads_interleave_0, values = (var_10791_cast_fp16, var_10791_cast_fp16, var_10807_cast_fp16, var_10807_cast_fp16, var_10823_cast_fp16, var_10823_cast_fp16, var_10839_cast_fp16, var_10839_cast_fp16, var_10855_cast_fp16, var_10855_cast_fp16, var_10871_cast_fp16, var_10871_cast_fp16, var_10887_cast_fp16, var_10887_cast_fp16, var_10903_cast_fp16, var_10903_cast_fp16))[name = string("value_heads_cast_fp16")]; + fp16 var_10926_to_fp16 = const()[name = string("op_10926_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_10927_cast_fp16 = mul(x = mh_q_165_cast_fp16, y = var_10926_to_fp16)[name = string("op_10927_cast_fp16")]; + bool mh_w_109_transpose_x_0 = const()[name = string("mh_w_109_transpose_x_0"), val = bool(true)]; + bool mh_w_109_transpose_y_0 = const()[name = string("mh_w_109_transpose_y_0"), val = bool(false)]; + tensor mh_w_109_cast_fp16 = matmul(transpose_x = mh_w_109_transpose_x_0, transpose_y = mh_w_109_transpose_y_0, x = var_10927_cast_fp16, y = key_heads_cast_fp16)[name = string("mh_w_109_cast_fp16")]; + tensor mh_w_cast_fp16 = add(x = mh_w_109_cast_fp16, y = var_487_cast_fp16)[name = string("mh_w_cast_fp16")]; + tensor var_10939_cast_fp16 = softmax(axis = var_10611, x = mh_w_cast_fp16)[name = string("op_10939_cast_fp16")]; + bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; + bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = value_heads_cast_fp16, y = var_10939_cast_fp16)[name = string("attn_cast_fp16")]; + tensor var_10944 = const()[name = string("op_10944"), val = tensor([1, -1, 1, 1])]; + tensor input_217_cast_fp16 = reshape(shape = var_10944, x = attn_cast_fp16)[name = string("input_217_cast_fp16")]; + string obj_pad_type_0 = const()[name = string("obj_pad_type_0"), val = string("valid")]; + tensor obj_strides_0 = const()[name = string("obj_strides_0"), val = tensor([1, 1])]; + tensor obj_pad_0 = const()[name = string("obj_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_dilations_0 = const()[name = string("obj_dilations_0"), val = tensor([1, 1])]; + int32 obj_groups_0 = const()[name = string("obj_groups_0"), val = int32(1)]; + tensor layers_27_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429198080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431295296))))[name = string("layers_27_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_27_self_attn_o_proj_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = string("obj_cast_fp16")]; + tensor inputs_221_cast_fp16 = add(x = inputs_215_cast_fp16, y = obj_cast_fp16)[name = string("inputs_221_cast_fp16")]; + tensor inputs_sq_223_cast_fp16 = mul(x = inputs_221_cast_fp16, y = inputs_221_cast_fp16)[name = string("inputs_sq_223_cast_fp16")]; + tensor variance_223_axes_0 = const()[name = string("variance_223_axes_0"), val = tensor([1])]; + bool variance_223_keep_dims_0 = const()[name = string("variance_223_keep_dims_0"), val = bool(true)]; + tensor variance_223_cast_fp16 = reduce_mean(axes = variance_223_axes_0, keep_dims = variance_223_keep_dims_0, x = inputs_sq_223_cast_fp16)[name = string("variance_223_cast_fp16")]; + fp16 var_10962_to_fp16 = const()[name = string("op_10962_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_10963_cast_fp16 = add(x = variance_223_cast_fp16, y = var_10962_to_fp16)[name = string("op_10963_cast_fp16")]; + fp32 var_10964_epsilon_0 = const()[name = string("op_10964_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_10964_cast_fp16 = rsqrt(epsilon = var_10964_epsilon_0, x = var_10963_cast_fp16)[name = string("op_10964_cast_fp16")]; + tensor hidden_states_277_cast_fp16 = mul(x = inputs_221_cast_fp16, y = var_10964_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + tensor w_223_to_fp16 = const()[name = string("w_223_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431295872)))]; + tensor input_219_cast_fp16 = mul(x = w_223_to_fp16, y = hidden_states_277_cast_fp16)[name = string("input_219_cast_fp16")]; + string input_221_pad_type_0 = const()[name = string("input_221_pad_type_0"), val = string("valid")]; + tensor input_221_strides_0 = const()[name = string("input_221_strides_0"), val = tensor([1, 1])]; + tensor input_221_pad_0 = const()[name = string("input_221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_221_dilations_0 = const()[name = string("input_221_dilations_0"), val = tensor([1, 1])]; + int32 input_221_groups_0 = const()[name = string("input_221_groups_0"), val = int32(1)]; + tensor layers_27_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431297984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434443776))))[name = string("layers_27_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_221_cast_fp16 = conv(dilations = input_221_dilations_0, groups = input_221_groups_0, pad = input_221_pad_0, pad_type = input_221_pad_type_0, strides = input_221_strides_0, weight = layers_27_mlp_gate_proj_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("input_221_cast_fp16")]; + tensor var_10978_cast_fp16 = silu(x = input_221_cast_fp16)[name = string("op_10978_cast_fp16")]; + string var_10984_pad_type_0 = const()[name = string("op_10984_pad_type_0"), val = string("valid")]; + tensor var_10984_strides_0 = const()[name = string("op_10984_strides_0"), val = tensor([1, 1])]; + tensor var_10984_pad_0 = const()[name = string("op_10984_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10984_dilations_0 = const()[name = string("op_10984_dilations_0"), val = tensor([1, 1])]; + int32 var_10984_groups_0 = const()[name = string("op_10984_groups_0"), val = int32(1)]; + tensor layers_27_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434444352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437590144))))[name = string("layers_27_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_10984_cast_fp16 = conv(dilations = var_10984_dilations_0, groups = var_10984_groups_0, pad = var_10984_pad_0, pad_type = var_10984_pad_type_0, strides = var_10984_strides_0, weight = layers_27_mlp_up_proj_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("op_10984_cast_fp16")]; + tensor input_223_cast_fp16 = mul(x = var_10978_cast_fp16, y = var_10984_cast_fp16)[name = string("input_223_cast_fp16")]; + string hidden_states_279_pad_type_0 = const()[name = string("hidden_states_279_pad_type_0"), val = string("valid")]; + tensor hidden_states_279_strides_0 = const()[name = string("hidden_states_279_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_279_pad_0 = const()[name = string("hidden_states_279_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_279_dilations_0 = const()[name = string("hidden_states_279_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_279_groups_0 = const()[name = string("hidden_states_279_groups_0"), val = int32(1)]; + tensor layers_27_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437590720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440736512))))[name = string("layers_27_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_279_cast_fp16 = conv(dilations = hidden_states_279_dilations_0, groups = hidden_states_279_groups_0, pad = hidden_states_279_pad_0, pad_type = hidden_states_279_pad_type_0, strides = hidden_states_279_strides_0, weight = layers_27_mlp_down_proj_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_221_cast_fp16, y = hidden_states_279_cast_fp16)[name = string("inputs_cast_fp16")]; + tensor inputs_sq_cast_fp16 = mul(x = inputs_cast_fp16, y = inputs_cast_fp16)[name = string("inputs_sq_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = inputs_sq_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_11005_to_fp16 = const()[name = string("op_11005_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_11006_cast_fp16 = add(x = variance_cast_fp16, y = var_11005_to_fp16)[name = string("op_11006_cast_fp16")]; + fp32 var_11007_epsilon_0 = const()[name = string("op_11007_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_11007_cast_fp16 = rsqrt(epsilon = var_11007_epsilon_0, x = var_11006_cast_fp16)[name = string("op_11007_cast_fp16")]; + tensor hidden_states_cast_fp16 = mul(x = inputs_cast_fp16, y = var_11007_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor w_to_fp16 = const()[name = string("w_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440737088)))]; + tensor hidden_states = mul(x = w_to_fp16, y = hidden_states_cast_fp16)[name = string("input_cast_fp16")]; + string logits_pad_type_0 = const()[name = string("logits_pad_type_0"), val = string("valid")]; + tensor logits_strides_0 = const()[name = string("logits_strides_0"), val = tensor([1, 1])]; + tensor logits_pad_0 = const()[name = string("logits_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_dilations_0 = const()[name = string("logits_dilations_0"), val = tensor([1, 1])]; + int32 logits_groups_0 = const()[name = string("logits_groups_0"), val = int32(1)]; + tensor codec_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440739200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443884992))))[name = string("codec_head_weight_to_fp16_palettized")]; + tensor logits_cast_fp16 = conv(dilations = logits_dilations_0, groups = logits_groups_0, pad = logits_pad_0, pad_type = logits_pad_type_0, strides = logits_strides_0, weight = codec_head_weight_to_fp16_palettized, x = hidden_states)[name = string("logits_cast_fp16")]; + tensor var_11024_axes_0 = const()[name = string("op_11024_axes_0"), val = tensor([3])]; + tensor var_11024_cast_fp16 = squeeze(axes = var_11024_axes_0, x = logits_cast_fp16)[name = string("op_11024_cast_fp16")]; + tensor var_11027_perm_0 = const()[name = string("op_11027_perm_0"), val = tensor([0, 2, 1])]; + int32 var_11029 = const()[name = string("op_11029"), val = int32(1)]; + bool var_11030_interleave_0 = const()[name = string("op_11030_interleave_0"), val = bool(false)]; + tensor key_cache_updates = concat(axis = var_11029, interleave = var_11030_interleave_0, values = (current_key_3_cast_fp16, current_key_7_cast_fp16, current_key_11_cast_fp16, current_key_15_cast_fp16, current_key_19_cast_fp16, current_key_23_cast_fp16, current_key_27_cast_fp16, current_key_31_cast_fp16, current_key_35_cast_fp16, current_key_39_cast_fp16, current_key_43_cast_fp16, current_key_47_cast_fp16, current_key_51_cast_fp16, current_key_55_cast_fp16, current_key_59_cast_fp16, current_key_63_cast_fp16, current_key_67_cast_fp16, current_key_71_cast_fp16, current_key_75_cast_fp16, current_key_79_cast_fp16, current_key_83_cast_fp16, current_key_87_cast_fp16, current_key_91_cast_fp16, current_key_95_cast_fp16, current_key_99_cast_fp16, current_key_103_cast_fp16, current_key_107_cast_fp16, current_key_cast_fp16))[name = string("op_11030_cast_fp16")]; + int32 var_11032 = const()[name = string("op_11032"), val = int32(1)]; + bool var_11033_interleave_0 = const()[name = string("op_11033_interleave_0"), val = bool(false)]; + tensor value_cache_updates = concat(axis = var_11032, interleave = var_11033_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_23_cast_fp16, current_value_25_cast_fp16, current_value_27_cast_fp16, current_value_29_cast_fp16, current_value_31_cast_fp16, current_value_33_cast_fp16, current_value_35_cast_fp16, current_value_37_cast_fp16, current_value_39_cast_fp16, current_value_41_cast_fp16, current_value_43_cast_fp16, current_value_45_cast_fp16, current_value_47_cast_fp16, current_value_49_cast_fp16, current_value_51_cast_fp16, current_value_53_cast_fp16, current_value_cast_fp16))[name = string("op_11033_cast_fp16")]; + tensor logits = transpose(perm = var_11027_perm_0, x = var_11024_cast_fp16)[name = string("transpose_0")]; + } -> (logits, hidden_states, key_cache_updates, value_cache_updates); +} \ No newline at end of file