diff --git "a/qwen3_tts/multi_code_decoder/12hz-1.7b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" "b/qwen3_tts/multi_code_decoder/12hz-1.7b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/qwen3_tts/multi_code_decoder/12hz-1.7b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" @@ -0,0 +1,1377 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { + string inputs_1_pad_type_0 = const()[name = string("inputs_1_pad_type_0"), val = string("valid")]; + tensor inputs_1_strides_0 = const()[name = string("inputs_1_strides_0"), val = tensor([1, 1])]; + tensor inputs_1_pad_0 = const()[name = string("inputs_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor inputs_1_dilations_0 = const()[name = string("inputs_1_dilations_0"), val = tensor([1, 1])]; + int32 inputs_1_groups_0 = const()[name = string("inputs_1_groups_0"), val = int32(1)]; + tensor input_projection_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("input_projection_weight_to_fp16_palettized")]; + tensor input_projection_bias_to_fp16 = const()[name = string("input_projection_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097856)))]; + tensor inputs_1_cast_fp16 = conv(bias = input_projection_bias_to_fp16, dilations = inputs_1_dilations_0, groups = inputs_1_groups_0, pad = inputs_1_pad_0, pad_type = inputs_1_pad_type_0, strides = inputs_1_strides_0, weight = input_projection_weight_to_fp16_palettized, x = input_embeds)[name = string("inputs_1_cast_fp16")]; + int32 pos_cos_batch_dims_0 = const()[name = string("pos_cos_batch_dims_0"), val = int32(0)]; + bool pos_cos_validate_indices_0 = const()[name = string("pos_cos_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_cos_weight_to_fp16 = const()[name = string("position_embeddings_cos_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099968)))]; + string cache_length_to_int16_dtype_0 = const()[name = string("cache_length_to_int16_dtype_0"), val = string("int16")]; + string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor cache_length_to_int16 = cast(dtype = cache_length_to_int16_dtype_0, x = cache_length)[name = string("cast_5")]; + tensor cast_111 = cast(dtype = cast_111_dtype_0, x = cache_length_to_int16)[name = string("cast_4")]; + tensor greater_equal_0 = greater_equal(x = cast_111, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(16)]; + tensor add_0 = add(x = cast_111, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = cast_111, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; + string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; + tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; + tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(16)]; + tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; + tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)]; + tensor pos_cos_cast_fp16_cast_uint16_cast_uint16 = gather(axis = pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = pos_cos_batch_dims_0, indices = select_0_1, validate_indices = pos_cos_validate_indices_0, x = position_embeddings_cos_weight_to_fp16)[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16")]; + tensor obj_7_axes_0 = const()[name = string("obj_7_axes_0"), val = tensor([2])]; + tensor obj_7_cast_fp16 = expand_dims(axes = obj_7_axes_0, x = pos_cos_cast_fp16_cast_uint16_cast_uint16)[name = string("obj_7_cast_fp16")]; + int32 pos_sin_axis_0 = const()[name = string("pos_sin_axis_0"), val = int32(0)]; + int32 pos_sin_batch_dims_0 = const()[name = string("pos_sin_batch_dims_0"), val = int32(0)]; + bool pos_sin_validate_indices_0 = const()[name = string("pos_sin_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_sin_weight_to_fp16 = const()[name = string("position_embeddings_sin_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104128)))]; + string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; + tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_1")]; + tensor pos_sin_cast_fp16_cast_uint16 = gather(axis = pos_sin_axis_0, batch_dims = pos_sin_batch_dims_0, indices = cache_length_to_uint16, validate_indices = pos_sin_validate_indices_0, x = position_embeddings_sin_weight_to_fp16)[name = string("pos_sin_cast_fp16_cast_uint16")]; + tensor obj_9_axes_0 = const()[name = string("obj_9_axes_0"), val = tensor([2])]; + tensor obj_9_cast_fp16 = expand_dims(axes = obj_9_axes_0, x = pos_sin_cast_fp16_cast_uint16)[name = string("obj_9_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1024, 1024, 1024, 1024, 1024])]; + int32 var_96_axis_0 = const()[name = string("op_96_axis_0"), val = int32(1)]; + tensor var_96_cast_fp16_0, tensor var_96_cast_fp16_1, tensor var_96_cast_fp16_2, tensor var_96_cast_fp16_3, tensor var_96_cast_fp16_4 = split(axis = var_96_axis_0, split_sizes = tile_0, x = key_cache)[name = string("op_96_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1024, 1024, 1024, 1024, 1024])]; + int32 var_104_axis_0 = const()[name = string("op_104_axis_0"), val = int32(1)]; + tensor var_104_cast_fp16_0, tensor var_104_cast_fp16_1, tensor var_104_cast_fp16_2, tensor var_104_cast_fp16_3, tensor var_104_cast_fp16_4 = split(axis = var_104_axis_0, split_sizes = tile_1, x = value_cache)[name = string("op_104_cast_fp16")]; + int32 var_111 = const()[name = string("op_111"), val = int32(3)]; + int32 var_121 = const()[name = string("op_121"), val = int32(-2)]; + int32 var_129 = const()[name = string("op_129"), val = int32(1)]; + tensor inputs_sq_1_cast_fp16 = mul(x = inputs_1_cast_fp16, y = inputs_1_cast_fp16)[name = string("inputs_sq_1_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = inputs_sq_1_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_141_to_fp16 = const()[name = string("op_141_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_142_cast_fp16 = add(x = variance_1_cast_fp16, y = var_141_to_fp16)[name = string("op_142_cast_fp16")]; + fp32 var_143_epsilon_0 = const()[name = string("op_143_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_143_cast_fp16 = rsqrt(epsilon = var_143_epsilon_0, x = var_142_cast_fp16)[name = string("op_143_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = mul(x = inputs_1_cast_fp16, y = var_143_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; + tensor w_1_to_fp16 = const()[name = string("w_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2108288)))]; + tensor obj_1_cast_fp16 = mul(x = w_1_to_fp16, y = hidden_states_1_cast_fp16)[name = string("obj_1_cast_fp16")]; + string query_1_pad_type_0 = const()[name = string("query_1_pad_type_0"), val = string("valid")]; + tensor query_1_strides_0 = const()[name = string("query_1_strides_0"), val = tensor([1, 1])]; + tensor query_1_pad_0 = const()[name = string("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_1_dilations_0 = const()[name = string("query_1_dilations_0"), val = tensor([1, 1])]; + int32 query_1_groups_0 = const()[name = string("query_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2110400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4207616))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4208192)))]; + tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("query_1_cast_fp16")]; + string current_key_1_pad_type_0 = const()[name = string("current_key_1_pad_type_0"), val = string("valid")]; + tensor current_key_1_strides_0 = const()[name = string("current_key_1_strides_0"), val = tensor([1, 1])]; + tensor current_key_1_pad_0 = const()[name = string("current_key_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_1_dilations_0 = const()[name = string("current_key_1_dilations_0"), val = tensor([1, 1])]; + int32 current_key_1_groups_0 = const()[name = string("current_key_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4212352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5260992))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_key_1_cast_fp16")]; + string current_value_1_pad_type_0 = const()[name = string("current_value_1_pad_type_0"), val = string("valid")]; + tensor current_value_1_strides_0 = const()[name = string("current_value_1_strides_0"), val = tensor([1, 1])]; + tensor current_value_1_pad_0 = const()[name = string("current_value_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_1_dilations_0 = const()[name = string("current_value_1_dilations_0"), val = tensor([1, 1])]; + int32 current_value_1_groups_0 = const()[name = string("current_value_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5261568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6310208))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6310784)))]; + tensor current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_value_1_cast_fp16")]; + tensor var_180 = const()[name = string("op_180"), val = tensor([16, 128, 1, 1])]; + tensor inputs_3_cast_fp16 = reshape(shape = var_180, x = query_1_cast_fp16)[name = string("inputs_3_cast_fp16")]; + tensor inputs_sq_3_cast_fp16 = mul(x = inputs_3_cast_fp16, y = inputs_3_cast_fp16)[name = string("inputs_sq_3_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = inputs_sq_3_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_187_cast_fp16 = add(x = variance_3_cast_fp16, y = var_186_to_fp16)[name = string("op_187_cast_fp16")]; + fp32 var_188_epsilon_0 = const()[name = string("op_188_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_188_cast_fp16 = rsqrt(epsilon = var_188_epsilon_0, x = var_187_cast_fp16)[name = string("op_188_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_3_cast_fp16, y = var_188_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor w_3_to_fp16 = const()[name = string("w_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6312896)))]; + tensor query_normed_1_cast_fp16 = mul(x = w_3_to_fp16, y = hidden_states_3_cast_fp16)[name = string("query_normed_1_cast_fp16")]; + tensor var_196 = const()[name = string("op_196"), val = tensor([8, 128, 1, 1])]; + tensor inputs_5_cast_fp16 = reshape(shape = var_196, x = current_key_1_cast_fp16)[name = string("inputs_5_cast_fp16")]; + tensor inputs_sq_5_cast_fp16 = mul(x = inputs_5_cast_fp16, y = inputs_5_cast_fp16)[name = string("inputs_sq_5_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = inputs_sq_5_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_202_to_fp16 = const()[name = string("op_202_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_203_cast_fp16 = add(x = variance_5_cast_fp16, y = var_202_to_fp16)[name = string("op_203_cast_fp16")]; + fp32 var_204_epsilon_0 = const()[name = string("op_204_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_204_cast_fp16 = rsqrt(epsilon = var_204_epsilon_0, x = var_203_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = mul(x = inputs_5_cast_fp16, y = var_204_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor w_5_to_fp16 = const()[name = string("w_5_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6313216)))]; + tensor current_key_normed_1_cast_fp16 = mul(x = w_5_to_fp16, y = hidden_states_5_cast_fp16)[name = string("current_key_normed_1_cast_fp16")]; + tensor var_222 = const()[name = string("op_222"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_222, x = query_normed_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; + tensor var_224 = const()[name = string("op_224"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_1_cast_fp16 = reshape(shape = var_224, x = current_key_normed_1_cast_fp16)[name = string("mh_k_1_cast_fp16")]; + tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([1])]; + tensor cos_1_cast_fp16 = expand_dims(axes = cos_1_axes_0, x = obj_7_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([1])]; + tensor sin_1_cast_fp16 = expand_dims(axes = sin_1_axes_0, x = obj_9_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor var_228_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_228_cast_fp16")]; + tensor var_233_begin_0 = const()[name = string("op_233_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_233_end_0 = const()[name = string("op_233_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_233_end_mask_0 = const()[name = string("op_233_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_233_cast_fp16 = slice_by_index(begin = var_233_begin_0, end = var_233_end_0, end_mask = var_233_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_233_cast_fp16")]; + tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_239_end_0 = const()[name = string("op_239_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_239_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_241_cast_fp16 = mul(x = var_239_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_241_cast_fp16")]; + bool var_243_interleave_0 = const()[name = string("op_243_interleave_0"), val = bool(false)]; + tensor var_243_cast_fp16 = concat(axis = var_121, interleave = var_243_interleave_0, values = (var_241_cast_fp16, var_233_cast_fp16))[name = string("op_243_cast_fp16")]; + tensor var_244_cast_fp16 = mul(x = var_243_cast_fp16, y = sin_1_cast_fp16)[name = string("op_244_cast_fp16")]; + tensor mh_q_3_cast_fp16 = add(x = var_228_cast_fp16, y = var_244_cast_fp16)[name = string("mh_q_3_cast_fp16")]; + tensor var_246_cast_fp16 = mul(x = mh_k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_246_cast_fp16")]; + tensor var_251_begin_0 = const()[name = string("op_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_251_end_0 = const()[name = string("op_251_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_251_end_mask_0 = const()[name = string("op_251_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_251_cast_fp16")]; + tensor var_257_begin_0 = const()[name = string("op_257_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_257_end_0 = const()[name = string("op_257_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_257_end_mask_0 = const()[name = string("op_257_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_257_cast_fp16 = slice_by_index(begin = var_257_begin_0, end = var_257_end_0, end_mask = var_257_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_257_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_259_cast_fp16 = mul(x = var_257_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_259_cast_fp16")]; + bool var_261_interleave_0 = const()[name = string("op_261_interleave_0"), val = bool(false)]; + tensor var_261_cast_fp16 = concat(axis = var_121, interleave = var_261_interleave_0, values = (var_259_cast_fp16, var_251_cast_fp16))[name = string("op_261_cast_fp16")]; + tensor var_262_cast_fp16 = mul(x = var_261_cast_fp16, y = sin_1_cast_fp16)[name = string("op_262_cast_fp16")]; + tensor mh_k_3_cast_fp16 = add(x = var_246_cast_fp16, y = var_262_cast_fp16)[name = string("mh_k_3_cast_fp16")]; + tensor var_266 = const()[name = string("op_266"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_3_cast_fp16 = reshape(shape = var_266, x = mh_k_3_cast_fp16)[name = string("current_key_3_cast_fp16")]; + tensor var_269_axes_0 = const()[name = string("op_269_axes_0"), val = tensor([1])]; + tensor var_269_cast_fp16 = expand_dims(axes = var_269_axes_0, x = kv_cache_update_mask)[name = string("op_269_cast_fp16")]; + tensor var_270_axes_0 = const()[name = string("op_270_axes_0"), val = tensor([2])]; + tensor var_270_cast_fp16 = expand_dims(axes = var_270_axes_0, x = var_269_cast_fp16)[name = string("op_270_cast_fp16")]; + fp16 var_122_to_fp16 = const()[name = string("op_122_to_fp16"), val = fp16(0x1p+0)]; + tensor var_272_cast_fp16 = sub(x = var_122_to_fp16, y = var_270_cast_fp16)[name = string("op_272_cast_fp16")]; + tensor var_273_cast_fp16 = mul(x = var_96_cast_fp16_0, y = var_272_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor var_274_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_270_cast_fp16)[name = string("op_274_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_273_cast_fp16, y = var_274_cast_fp16)[name = string("key_3_cast_fp16")]; + tensor var_277_cast_fp16 = mul(x = var_104_cast_fp16_0, y = var_272_cast_fp16)[name = string("op_277_cast_fp16")]; + tensor var_278_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_270_cast_fp16)[name = string("op_278_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_277_cast_fp16, y = var_278_cast_fp16)[name = string("value_1_cast_fp16")]; + tensor var_282 = const()[name = string("op_282"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_1_cast_fp16 = reshape(shape = var_282, x = key_3_cast_fp16)[name = string("key_heads_1_cast_fp16")]; + tensor var_284 = const()[name = string("op_284"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_1_cast_fp16 = reshape(shape = var_284, x = value_1_cast_fp16)[name = string("value_heads_1_cast_fp16")]; + tensor var_287_begin_0 = const()[name = string("op_287_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_287_end_0 = const()[name = string("op_287_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_287_end_mask_0 = const()[name = string("op_287_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_287_cast_fp16")]; + tensor var_291_begin_0 = const()[name = string("op_291_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_291_end_0 = const()[name = string("op_291_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_291_end_mask_0 = const()[name = string("op_291_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_291_cast_fp16 = slice_by_index(begin = var_291_begin_0, end = var_291_end_0, end_mask = var_291_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_291_cast_fp16")]; + tensor var_303_begin_0 = const()[name = string("op_303_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_303_end_0 = const()[name = string("op_303_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_303_end_mask_0 = const()[name = string("op_303_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_303_cast_fp16 = slice_by_index(begin = var_303_begin_0, end = var_303_end_0, end_mask = var_303_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_303_cast_fp16")]; + tensor var_307_begin_0 = const()[name = string("op_307_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_307_end_0 = const()[name = string("op_307_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_307_end_mask_0 = const()[name = string("op_307_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_307_cast_fp16 = slice_by_index(begin = var_307_begin_0, end = var_307_end_0, end_mask = var_307_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_307_cast_fp16")]; + tensor var_319_begin_0 = const()[name = string("op_319_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_319_end_0 = const()[name = string("op_319_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_319_end_mask_0 = const()[name = string("op_319_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_319_cast_fp16 = slice_by_index(begin = var_319_begin_0, end = var_319_end_0, end_mask = var_319_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_319_cast_fp16")]; + tensor var_323_begin_0 = const()[name = string("op_323_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_323_end_0 = const()[name = string("op_323_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_323_end_mask_0 = const()[name = string("op_323_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_323_cast_fp16 = slice_by_index(begin = var_323_begin_0, end = var_323_end_0, end_mask = var_323_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_323_cast_fp16")]; + tensor var_335_begin_0 = const()[name = string("op_335_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_335_end_0 = const()[name = string("op_335_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_335_end_mask_0 = const()[name = string("op_335_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_335_cast_fp16 = slice_by_index(begin = var_335_begin_0, end = var_335_end_0, end_mask = var_335_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_335_cast_fp16")]; + tensor var_339_begin_0 = const()[name = string("op_339_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_339_end_0 = const()[name = string("op_339_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_339_end_mask_0 = const()[name = string("op_339_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_339_cast_fp16")]; + tensor var_351_begin_0 = const()[name = string("op_351_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_351_end_0 = const()[name = string("op_351_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_351_end_mask_0 = const()[name = string("op_351_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_351_cast_fp16 = slice_by_index(begin = var_351_begin_0, end = var_351_end_0, end_mask = var_351_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_351_cast_fp16")]; + tensor var_355_begin_0 = const()[name = string("op_355_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_355_end_0 = const()[name = string("op_355_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_355_end_mask_0 = const()[name = string("op_355_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_355_cast_fp16 = slice_by_index(begin = var_355_begin_0, end = var_355_end_0, end_mask = var_355_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_355_cast_fp16")]; + tensor var_367_begin_0 = const()[name = string("op_367_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_367_end_0 = const()[name = string("op_367_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_367_end_mask_0 = const()[name = string("op_367_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_367_cast_fp16")]; + tensor var_371_begin_0 = const()[name = string("op_371_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_371_end_0 = const()[name = string("op_371_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_371_end_mask_0 = const()[name = string("op_371_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_371_cast_fp16 = slice_by_index(begin = var_371_begin_0, end = var_371_end_0, end_mask = var_371_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_371_cast_fp16")]; + tensor var_383_begin_0 = const()[name = string("op_383_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_383_end_0 = const()[name = string("op_383_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_383_end_mask_0 = const()[name = string("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_383_cast_fp16")]; + tensor var_387_begin_0 = const()[name = string("op_387_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_387_end_0 = const()[name = string("op_387_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_387_end_mask_0 = const()[name = string("op_387_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_387_cast_fp16 = slice_by_index(begin = var_387_begin_0, end = var_387_end_0, end_mask = var_387_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_387_cast_fp16")]; + tensor var_399_begin_0 = const()[name = string("op_399_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_399_end_0 = const()[name = string("op_399_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_399_end_mask_0 = const()[name = string("op_399_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_399_cast_fp16")]; + tensor var_403_begin_0 = const()[name = string("op_403_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_403_end_0 = const()[name = string("op_403_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_403_end_mask_0 = const()[name = string("op_403_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_403_cast_fp16 = slice_by_index(begin = var_403_begin_0, end = var_403_end_0, end_mask = var_403_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_403_cast_fp16")]; + bool key_heads_3_interleave_0 = const()[name = string("key_heads_3_interleave_0"), val = bool(false)]; + tensor key_heads_3_cast_fp16 = concat(axis = var_129, interleave = key_heads_3_interleave_0, values = (var_287_cast_fp16, var_287_cast_fp16, var_303_cast_fp16, var_303_cast_fp16, var_319_cast_fp16, var_319_cast_fp16, var_335_cast_fp16, var_335_cast_fp16, var_351_cast_fp16, var_351_cast_fp16, var_367_cast_fp16, var_367_cast_fp16, var_383_cast_fp16, var_383_cast_fp16, var_399_cast_fp16, var_399_cast_fp16))[name = string("key_heads_3_cast_fp16")]; + bool value_heads_3_interleave_0 = const()[name = string("value_heads_3_interleave_0"), val = bool(false)]; + tensor value_heads_3_cast_fp16 = concat(axis = var_129, interleave = value_heads_3_interleave_0, values = (var_291_cast_fp16, var_291_cast_fp16, var_307_cast_fp16, var_307_cast_fp16, var_323_cast_fp16, var_323_cast_fp16, var_339_cast_fp16, var_339_cast_fp16, var_355_cast_fp16, var_355_cast_fp16, var_371_cast_fp16, var_371_cast_fp16, var_387_cast_fp16, var_387_cast_fp16, var_403_cast_fp16, var_403_cast_fp16))[name = string("value_heads_3_cast_fp16")]; + fp16 var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_427_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_426_to_fp16)[name = string("op_427_cast_fp16")]; + bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; + bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_427_cast_fp16, y = key_heads_3_cast_fp16)[name = string("mh_w_1_cast_fp16")]; + tensor var_435_axes_0 = const()[name = string("op_435_axes_0"), val = tensor([1])]; + tensor var_435_cast_fp16 = expand_dims(axes = var_435_axes_0, x = key_padding_mask)[name = string("op_435_cast_fp16")]; + tensor var_436_axes_0 = const()[name = string("op_436_axes_0"), val = tensor([2])]; + tensor var_436_cast_fp16 = expand_dims(axes = var_436_axes_0, x = var_435_cast_fp16)[name = string("op_436_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_436_cast_fp16)[name = string("mh_w_3_cast_fp16")]; + tensor var_439_cast_fp16 = softmax(axis = var_111, x = mh_w_3_cast_fp16)[name = string("op_439_cast_fp16")]; + bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; + bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = value_heads_3_cast_fp16, y = var_439_cast_fp16)[name = string("attn_1_cast_fp16")]; + tensor var_444 = const()[name = string("op_444"), val = tensor([1, -1, 1, 1])]; + tensor input_1_cast_fp16 = reshape(shape = var_444, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; + string obj_11_pad_type_0 = const()[name = string("obj_11_pad_type_0"), val = string("valid")]; + tensor obj_11_strides_0 = const()[name = string("obj_11_strides_0"), val = tensor([1, 1])]; + tensor obj_11_pad_0 = const()[name = string("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_11_dilations_0 = const()[name = string("obj_11_dilations_0"), val = tensor([1, 1])]; + int32 obj_11_groups_0 = const()[name = string("obj_11_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6313536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8410752))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_11_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("obj_11_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_7_cast_fp16")]; + tensor inputs_sq_7_cast_fp16 = mul(x = inputs_7_cast_fp16, y = inputs_7_cast_fp16)[name = string("inputs_sq_7_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = inputs_sq_7_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_462_to_fp16 = const()[name = string("op_462_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_463_cast_fp16 = add(x = variance_7_cast_fp16, y = var_462_to_fp16)[name = string("op_463_cast_fp16")]; + fp32 var_464_epsilon_0 = const()[name = string("op_464_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_464_cast_fp16 = rsqrt(epsilon = var_464_epsilon_0, x = var_463_cast_fp16)[name = string("op_464_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = mul(x = inputs_7_cast_fp16, y = var_464_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor w_7_to_fp16 = const()[name = string("w_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8411328)))]; + tensor input_3_cast_fp16 = mul(x = w_7_to_fp16, y = hidden_states_7_cast_fp16)[name = string("input_3_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8413440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11559232))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor var_478_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_478_cast_fp16")]; + string var_484_pad_type_0 = const()[name = string("op_484_pad_type_0"), val = string("valid")]; + tensor var_484_strides_0 = const()[name = string("op_484_strides_0"), val = tensor([1, 1])]; + tensor var_484_pad_0 = const()[name = string("op_484_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_484_dilations_0 = const()[name = string("op_484_dilations_0"), val = tensor([1, 1])]; + int32 var_484_groups_0 = const()[name = string("op_484_groups_0"), val = int32(1)]; + tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11559808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14705600))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_484_cast_fp16 = conv(dilations = var_484_dilations_0, groups = var_484_groups_0, pad = var_484_pad_0, pad_type = var_484_pad_type_0, strides = var_484_strides_0, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_484_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_478_cast_fp16, y = var_484_cast_fp16)[name = string("input_7_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14706176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17851968))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_9_cast_fp16")]; + int32 var_498 = const()[name = string("op_498"), val = int32(3)]; + int32 var_508 = const()[name = string("op_508"), val = int32(-2)]; + int32 var_516 = const()[name = string("op_516"), val = int32(1)]; + tensor inputs_sq_9_cast_fp16 = mul(x = inputs_9_cast_fp16, y = inputs_9_cast_fp16)[name = string("inputs_sq_9_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = inputs_sq_9_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_529_cast_fp16 = add(x = variance_9_cast_fp16, y = var_528_to_fp16)[name = string("op_529_cast_fp16")]; + fp32 var_530_epsilon_0 = const()[name = string("op_530_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_530_cast_fp16 = rsqrt(epsilon = var_530_epsilon_0, x = var_529_cast_fp16)[name = string("op_530_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = mul(x = inputs_9_cast_fp16, y = var_530_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor w_9_to_fp16 = const()[name = string("w_9_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17852544)))]; + tensor obj_13_cast_fp16 = mul(x = w_9_to_fp16, y = hidden_states_11_cast_fp16)[name = string("obj_13_cast_fp16")]; + string query_7_pad_type_0 = const()[name = string("query_7_pad_type_0"), val = string("valid")]; + tensor query_7_strides_0 = const()[name = string("query_7_strides_0"), val = tensor([1, 1])]; + tensor query_7_pad_0 = const()[name = string("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_7_dilations_0 = const()[name = string("query_7_dilations_0"), val = tensor([1, 1])]; + int32 query_7_groups_0 = const()[name = string("query_7_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17854656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19951872))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_7_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("query_7_cast_fp16")]; + string current_key_5_pad_type_0 = const()[name = string("current_key_5_pad_type_0"), val = string("valid")]; + tensor current_key_5_strides_0 = const()[name = string("current_key_5_strides_0"), val = tensor([1, 1])]; + tensor current_key_5_pad_0 = const()[name = string("current_key_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_5_dilations_0 = const()[name = string("current_key_5_dilations_0"), val = tensor([1, 1])]; + int32 current_key_5_groups_0 = const()[name = string("current_key_5_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19952448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21001088))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_5_cast_fp16 = conv(dilations = current_key_5_dilations_0, groups = current_key_5_groups_0, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = current_key_5_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_key_5_cast_fp16")]; + string current_value_3_pad_type_0 = const()[name = string("current_value_3_pad_type_0"), val = string("valid")]; + tensor current_value_3_strides_0 = const()[name = string("current_value_3_strides_0"), val = tensor([1, 1])]; + tensor current_value_3_pad_0 = const()[name = string("current_value_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_3_dilations_0 = const()[name = string("current_value_3_dilations_0"), val = tensor([1, 1])]; + int32 current_value_3_groups_0 = const()[name = string("current_value_3_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21001664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22050304))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_3_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_value_3_cast_fp16")]; + tensor var_567 = const()[name = string("op_567"), val = tensor([16, 128, 1, 1])]; + tensor inputs_11_cast_fp16 = reshape(shape = var_567, x = query_7_cast_fp16)[name = string("inputs_11_cast_fp16")]; + tensor inputs_sq_11_cast_fp16 = mul(x = inputs_11_cast_fp16, y = inputs_11_cast_fp16)[name = string("inputs_sq_11_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = inputs_sq_11_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_574_cast_fp16 = add(x = variance_11_cast_fp16, y = var_573_to_fp16)[name = string("op_574_cast_fp16")]; + fp32 var_575_epsilon_0 = const()[name = string("op_575_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_575_cast_fp16 = rsqrt(epsilon = var_575_epsilon_0, x = var_574_cast_fp16)[name = string("op_575_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = mul(x = inputs_11_cast_fp16, y = var_575_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor w_11_to_fp16 = const()[name = string("w_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22050880)))]; + tensor query_normed_3_cast_fp16 = mul(x = w_11_to_fp16, y = hidden_states_13_cast_fp16)[name = string("query_normed_3_cast_fp16")]; + tensor var_583 = const()[name = string("op_583"), val = tensor([8, 128, 1, 1])]; + tensor inputs_13_cast_fp16 = reshape(shape = var_583, x = current_key_5_cast_fp16)[name = string("inputs_13_cast_fp16")]; + tensor inputs_sq_13_cast_fp16 = mul(x = inputs_13_cast_fp16, y = inputs_13_cast_fp16)[name = string("inputs_sq_13_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = inputs_sq_13_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_589_to_fp16 = const()[name = string("op_589_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_590_cast_fp16 = add(x = variance_13_cast_fp16, y = var_589_to_fp16)[name = string("op_590_cast_fp16")]; + fp32 var_591_epsilon_0 = const()[name = string("op_591_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_591_cast_fp16 = rsqrt(epsilon = var_591_epsilon_0, x = var_590_cast_fp16)[name = string("op_591_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = mul(x = inputs_13_cast_fp16, y = var_591_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor w_13_to_fp16 = const()[name = string("w_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22051200)))]; + tensor current_key_normed_3_cast_fp16 = mul(x = w_13_to_fp16, y = hidden_states_15_cast_fp16)[name = string("current_key_normed_3_cast_fp16")]; + tensor var_609 = const()[name = string("op_609"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_609, x = query_normed_3_cast_fp16)[name = string("mh_q_7_cast_fp16")]; + tensor var_611 = const()[name = string("op_611"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_5_cast_fp16 = reshape(shape = var_611, x = current_key_normed_3_cast_fp16)[name = string("mh_k_5_cast_fp16")]; + tensor var_615_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_615_cast_fp16")]; + tensor var_620_begin_0 = const()[name = string("op_620_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_620_end_0 = const()[name = string("op_620_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_620_end_mask_0 = const()[name = string("op_620_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_620_cast_fp16 = slice_by_index(begin = var_620_begin_0, end = var_620_end_0, end_mask = var_620_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_620_cast_fp16")]; + tensor var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_626_end_0 = const()[name = string("op_626_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = var_626_end_0, end_mask = var_626_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_626_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_628_cast_fp16")]; + bool var_630_interleave_0 = const()[name = string("op_630_interleave_0"), val = bool(false)]; + tensor var_630_cast_fp16 = concat(axis = var_508, interleave = var_630_interleave_0, values = (var_628_cast_fp16, var_620_cast_fp16))[name = string("op_630_cast_fp16")]; + tensor var_631_cast_fp16 = mul(x = var_630_cast_fp16, y = sin_1_cast_fp16)[name = string("op_631_cast_fp16")]; + tensor mh_q_9_cast_fp16 = add(x = var_615_cast_fp16, y = var_631_cast_fp16)[name = string("mh_q_9_cast_fp16")]; + tensor var_633_cast_fp16 = mul(x = mh_k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_633_cast_fp16")]; + tensor var_638_begin_0 = const()[name = string("op_638_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_end_0 = const()[name = string("op_638_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_638_end_mask_0 = const()[name = string("op_638_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_638_cast_fp16 = slice_by_index(begin = var_638_begin_0, end = var_638_end_0, end_mask = var_638_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_638_cast_fp16")]; + tensor var_644_begin_0 = const()[name = string("op_644_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_644_end_0 = const()[name = string("op_644_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_644_end_mask_0 = const()[name = string("op_644_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_644_cast_fp16 = slice_by_index(begin = var_644_begin_0, end = var_644_end_0, end_mask = var_644_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_644_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_646_cast_fp16 = mul(x = var_644_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_646_cast_fp16")]; + bool var_648_interleave_0 = const()[name = string("op_648_interleave_0"), val = bool(false)]; + tensor var_648_cast_fp16 = concat(axis = var_508, interleave = var_648_interleave_0, values = (var_646_cast_fp16, var_638_cast_fp16))[name = string("op_648_cast_fp16")]; + tensor var_649_cast_fp16 = mul(x = var_648_cast_fp16, y = sin_1_cast_fp16)[name = string("op_649_cast_fp16")]; + tensor mh_k_7_cast_fp16 = add(x = var_633_cast_fp16, y = var_649_cast_fp16)[name = string("mh_k_7_cast_fp16")]; + tensor var_653 = const()[name = string("op_653"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_7_cast_fp16 = reshape(shape = var_653, x = mh_k_7_cast_fp16)[name = string("current_key_7_cast_fp16")]; + tensor var_660_cast_fp16 = mul(x = var_96_cast_fp16_1, y = var_272_cast_fp16)[name = string("op_660_cast_fp16")]; + tensor var_661_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_270_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_660_cast_fp16, y = var_661_cast_fp16)[name = string("key_9_cast_fp16")]; + tensor var_664_cast_fp16 = mul(x = var_104_cast_fp16_1, y = var_272_cast_fp16)[name = string("op_664_cast_fp16")]; + tensor var_665_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_270_cast_fp16)[name = string("op_665_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_664_cast_fp16, y = var_665_cast_fp16)[name = string("value_5_cast_fp16")]; + tensor var_669 = const()[name = string("op_669"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_5_cast_fp16 = reshape(shape = var_669, x = key_9_cast_fp16)[name = string("key_heads_5_cast_fp16")]; + tensor var_671 = const()[name = string("op_671"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_5_cast_fp16 = reshape(shape = var_671, x = value_5_cast_fp16)[name = string("value_heads_5_cast_fp16")]; + tensor var_674_begin_0 = const()[name = string("op_674_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_674_end_0 = const()[name = string("op_674_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_674_end_mask_0 = const()[name = string("op_674_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_674_cast_fp16 = slice_by_index(begin = var_674_begin_0, end = var_674_end_0, end_mask = var_674_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_674_cast_fp16")]; + tensor var_678_begin_0 = const()[name = string("op_678_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_678_end_0 = const()[name = string("op_678_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_678_end_mask_0 = const()[name = string("op_678_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_678_cast_fp16 = slice_by_index(begin = var_678_begin_0, end = var_678_end_0, end_mask = var_678_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_678_cast_fp16")]; + tensor var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_690_end_0 = const()[name = string("op_690_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = var_690_end_0, end_mask = var_690_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor var_694_begin_0 = const()[name = string("op_694_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_694_end_0 = const()[name = string("op_694_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_694_end_mask_0 = const()[name = string("op_694_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_694_cast_fp16 = slice_by_index(begin = var_694_begin_0, end = var_694_end_0, end_mask = var_694_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_694_cast_fp16")]; + tensor var_706_begin_0 = const()[name = string("op_706_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_706_end_0 = const()[name = string("op_706_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_706_end_mask_0 = const()[name = string("op_706_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_706_cast_fp16 = slice_by_index(begin = var_706_begin_0, end = var_706_end_0, end_mask = var_706_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_706_cast_fp16")]; + tensor var_710_begin_0 = const()[name = string("op_710_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_710_end_0 = const()[name = string("op_710_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_710_end_mask_0 = const()[name = string("op_710_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_710_cast_fp16")]; + tensor var_722_begin_0 = const()[name = string("op_722_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_722_end_0 = const()[name = string("op_722_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_722_end_mask_0 = const()[name = string("op_722_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_722_cast_fp16 = slice_by_index(begin = var_722_begin_0, end = var_722_end_0, end_mask = var_722_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_722_cast_fp16")]; + tensor var_726_begin_0 = const()[name = string("op_726_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_726_end_0 = const()[name = string("op_726_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_726_end_mask_0 = const()[name = string("op_726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_726_cast_fp16 = slice_by_index(begin = var_726_begin_0, end = var_726_end_0, end_mask = var_726_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_726_cast_fp16")]; + tensor var_738_begin_0 = const()[name = string("op_738_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_738_end_0 = const()[name = string("op_738_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_738_end_mask_0 = const()[name = string("op_738_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_738_cast_fp16 = slice_by_index(begin = var_738_begin_0, end = var_738_end_0, end_mask = var_738_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_738_cast_fp16")]; + tensor var_742_begin_0 = const()[name = string("op_742_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_742_end_0 = const()[name = string("op_742_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_742_end_mask_0 = const()[name = string("op_742_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_742_cast_fp16 = slice_by_index(begin = var_742_begin_0, end = var_742_end_0, end_mask = var_742_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_742_cast_fp16")]; + tensor var_754_begin_0 = const()[name = string("op_754_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_754_end_0 = const()[name = string("op_754_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_754_end_mask_0 = const()[name = string("op_754_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_754_cast_fp16 = slice_by_index(begin = var_754_begin_0, end = var_754_end_0, end_mask = var_754_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_754_cast_fp16")]; + tensor var_758_begin_0 = const()[name = string("op_758_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_758_end_0 = const()[name = string("op_758_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_758_end_mask_0 = const()[name = string("op_758_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_758_cast_fp16 = slice_by_index(begin = var_758_begin_0, end = var_758_end_0, end_mask = var_758_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_758_cast_fp16")]; + tensor var_770_begin_0 = const()[name = string("op_770_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_770_end_0 = const()[name = string("op_770_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_770_end_mask_0 = const()[name = string("op_770_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_770_cast_fp16 = slice_by_index(begin = var_770_begin_0, end = var_770_end_0, end_mask = var_770_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_770_cast_fp16")]; + tensor var_774_begin_0 = const()[name = string("op_774_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_774_end_0 = const()[name = string("op_774_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_774_end_mask_0 = const()[name = string("op_774_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor var_786_begin_0 = const()[name = string("op_786_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_786_end_0 = const()[name = string("op_786_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_786_end_mask_0 = const()[name = string("op_786_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_786_cast_fp16 = slice_by_index(begin = var_786_begin_0, end = var_786_end_0, end_mask = var_786_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_786_cast_fp16")]; + tensor var_790_begin_0 = const()[name = string("op_790_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_790_end_0 = const()[name = string("op_790_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_790_end_mask_0 = const()[name = string("op_790_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_790_cast_fp16 = slice_by_index(begin = var_790_begin_0, end = var_790_end_0, end_mask = var_790_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_790_cast_fp16")]; + bool key_heads_7_interleave_0 = const()[name = string("key_heads_7_interleave_0"), val = bool(false)]; + tensor key_heads_7_cast_fp16 = concat(axis = var_516, interleave = key_heads_7_interleave_0, values = (var_674_cast_fp16, var_674_cast_fp16, var_690_cast_fp16, var_690_cast_fp16, var_706_cast_fp16, var_706_cast_fp16, var_722_cast_fp16, var_722_cast_fp16, var_738_cast_fp16, var_738_cast_fp16, var_754_cast_fp16, var_754_cast_fp16, var_770_cast_fp16, var_770_cast_fp16, var_786_cast_fp16, var_786_cast_fp16))[name = string("key_heads_7_cast_fp16")]; + bool value_heads_7_interleave_0 = const()[name = string("value_heads_7_interleave_0"), val = bool(false)]; + tensor value_heads_7_cast_fp16 = concat(axis = var_516, interleave = value_heads_7_interleave_0, values = (var_678_cast_fp16, var_678_cast_fp16, var_694_cast_fp16, var_694_cast_fp16, var_710_cast_fp16, var_710_cast_fp16, var_726_cast_fp16, var_726_cast_fp16, var_742_cast_fp16, var_742_cast_fp16, var_758_cast_fp16, var_758_cast_fp16, var_774_cast_fp16, var_774_cast_fp16, var_790_cast_fp16, var_790_cast_fp16))[name = string("value_heads_7_cast_fp16")]; + fp16 var_813_to_fp16 = const()[name = string("op_813_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_814_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_813_to_fp16)[name = string("op_814_cast_fp16")]; + bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; + bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_814_cast_fp16, y = key_heads_7_cast_fp16)[name = string("mh_w_5_cast_fp16")]; + tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_436_cast_fp16)[name = string("mh_w_7_cast_fp16")]; + tensor var_826_cast_fp16 = softmax(axis = var_498, x = mh_w_7_cast_fp16)[name = string("op_826_cast_fp16")]; + bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; + bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = value_heads_7_cast_fp16, y = var_826_cast_fp16)[name = string("attn_3_cast_fp16")]; + tensor var_831 = const()[name = string("op_831"), val = tensor([1, -1, 1, 1])]; + tensor input_9_cast_fp16 = reshape(shape = var_831, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; + string obj_19_pad_type_0 = const()[name = string("obj_19_pad_type_0"), val = string("valid")]; + tensor obj_19_strides_0 = const()[name = string("obj_19_strides_0"), val = tensor([1, 1])]; + tensor obj_19_pad_0 = const()[name = string("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_19_dilations_0 = const()[name = string("obj_19_dilations_0"), val = tensor([1, 1])]; + int32 obj_19_groups_0 = const()[name = string("obj_19_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22051520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24148736))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_19_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("obj_19_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_15_cast_fp16")]; + tensor inputs_sq_15_cast_fp16 = mul(x = inputs_15_cast_fp16, y = inputs_15_cast_fp16)[name = string("inputs_sq_15_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = inputs_sq_15_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_850_cast_fp16 = add(x = variance_15_cast_fp16, y = var_849_to_fp16)[name = string("op_850_cast_fp16")]; + fp32 var_851_epsilon_0 = const()[name = string("op_851_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_851_cast_fp16 = rsqrt(epsilon = var_851_epsilon_0, x = var_850_cast_fp16)[name = string("op_851_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = mul(x = inputs_15_cast_fp16, y = var_851_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor w_15_to_fp16 = const()[name = string("w_15_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24149312)))]; + tensor input_11_cast_fp16 = mul(x = w_15_to_fp16, y = hidden_states_17_cast_fp16)[name = string("input_11_cast_fp16")]; + string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; + tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; + tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; + int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; + tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24151424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27297216))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor var_865_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_865_cast_fp16")]; + string var_871_pad_type_0 = const()[name = string("op_871_pad_type_0"), val = string("valid")]; + tensor var_871_strides_0 = const()[name = string("op_871_strides_0"), val = tensor([1, 1])]; + tensor var_871_pad_0 = const()[name = string("op_871_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_871_dilations_0 = const()[name = string("op_871_dilations_0"), val = tensor([1, 1])]; + int32 var_871_groups_0 = const()[name = string("op_871_groups_0"), val = int32(1)]; + tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27297792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30443584))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_871_cast_fp16 = conv(dilations = var_871_dilations_0, groups = var_871_groups_0, pad = var_871_pad_0, pad_type = var_871_pad_type_0, strides = var_871_strides_0, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_871_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_865_cast_fp16, y = var_871_cast_fp16)[name = string("input_15_cast_fp16")]; + string hidden_states_19_pad_type_0 = const()[name = string("hidden_states_19_pad_type_0"), val = string("valid")]; + tensor hidden_states_19_strides_0 = const()[name = string("hidden_states_19_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_19_pad_0 = const()[name = string("hidden_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_19_dilations_0 = const()[name = string("hidden_states_19_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_19_groups_0 = const()[name = string("hidden_states_19_groups_0"), val = int32(1)]; + tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30444160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33589952))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_19_cast_fp16 = conv(dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_17_cast_fp16")]; + int32 var_885 = const()[name = string("op_885"), val = int32(3)]; + int32 var_895 = const()[name = string("op_895"), val = int32(-2)]; + int32 var_903 = const()[name = string("op_903"), val = int32(1)]; + tensor inputs_sq_17_cast_fp16 = mul(x = inputs_17_cast_fp16, y = inputs_17_cast_fp16)[name = string("inputs_sq_17_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = inputs_sq_17_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_916_cast_fp16 = add(x = variance_17_cast_fp16, y = var_915_to_fp16)[name = string("op_916_cast_fp16")]; + fp32 var_917_epsilon_0 = const()[name = string("op_917_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_917_cast_fp16 = rsqrt(epsilon = var_917_epsilon_0, x = var_916_cast_fp16)[name = string("op_917_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = mul(x = inputs_17_cast_fp16, y = var_917_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor w_17_to_fp16 = const()[name = string("w_17_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33590528)))]; + tensor obj_21_cast_fp16 = mul(x = w_17_to_fp16, y = hidden_states_21_cast_fp16)[name = string("obj_21_cast_fp16")]; + string query_13_pad_type_0 = const()[name = string("query_13_pad_type_0"), val = string("valid")]; + tensor query_13_strides_0 = const()[name = string("query_13_strides_0"), val = tensor([1, 1])]; + tensor query_13_pad_0 = const()[name = string("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_13_dilations_0 = const()[name = string("query_13_dilations_0"), val = tensor([1, 1])]; + int32 query_13_groups_0 = const()[name = string("query_13_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33592640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35689856))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_13_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("query_13_cast_fp16")]; + string current_key_9_pad_type_0 = const()[name = string("current_key_9_pad_type_0"), val = string("valid")]; + tensor current_key_9_strides_0 = const()[name = string("current_key_9_strides_0"), val = tensor([1, 1])]; + tensor current_key_9_pad_0 = const()[name = string("current_key_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_9_dilations_0 = const()[name = string("current_key_9_dilations_0"), val = tensor([1, 1])]; + int32 current_key_9_groups_0 = const()[name = string("current_key_9_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35690432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36739072))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_9_cast_fp16 = conv(dilations = current_key_9_dilations_0, groups = current_key_9_groups_0, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = current_key_9_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_key_9_cast_fp16")]; + string current_value_5_pad_type_0 = const()[name = string("current_value_5_pad_type_0"), val = string("valid")]; + tensor current_value_5_strides_0 = const()[name = string("current_value_5_strides_0"), val = tensor([1, 1])]; + tensor current_value_5_pad_0 = const()[name = string("current_value_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_5_dilations_0 = const()[name = string("current_value_5_dilations_0"), val = tensor([1, 1])]; + int32 current_value_5_groups_0 = const()[name = string("current_value_5_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36739648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37788288))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_5_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_5_dilations_0, groups = current_value_5_groups_0, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = current_value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_value_5_cast_fp16")]; + tensor var_954 = const()[name = string("op_954"), val = tensor([16, 128, 1, 1])]; + tensor inputs_19_cast_fp16 = reshape(shape = var_954, x = query_13_cast_fp16)[name = string("inputs_19_cast_fp16")]; + tensor inputs_sq_19_cast_fp16 = mul(x = inputs_19_cast_fp16, y = inputs_19_cast_fp16)[name = string("inputs_sq_19_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = inputs_sq_19_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_960_to_fp16 = const()[name = string("op_960_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_961_cast_fp16 = add(x = variance_19_cast_fp16, y = var_960_to_fp16)[name = string("op_961_cast_fp16")]; + fp32 var_962_epsilon_0 = const()[name = string("op_962_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_962_cast_fp16 = rsqrt(epsilon = var_962_epsilon_0, x = var_961_cast_fp16)[name = string("op_962_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = inputs_19_cast_fp16, y = var_962_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor w_19_to_fp16 = const()[name = string("w_19_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37788864)))]; + tensor query_normed_5_cast_fp16 = mul(x = w_19_to_fp16, y = hidden_states_23_cast_fp16)[name = string("query_normed_5_cast_fp16")]; + tensor var_970 = const()[name = string("op_970"), val = tensor([8, 128, 1, 1])]; + tensor inputs_21_cast_fp16 = reshape(shape = var_970, x = current_key_9_cast_fp16)[name = string("inputs_21_cast_fp16")]; + tensor inputs_sq_21_cast_fp16 = mul(x = inputs_21_cast_fp16, y = inputs_21_cast_fp16)[name = string("inputs_sq_21_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = inputs_sq_21_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_976_to_fp16 = const()[name = string("op_976_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_977_cast_fp16 = add(x = variance_21_cast_fp16, y = var_976_to_fp16)[name = string("op_977_cast_fp16")]; + fp32 var_978_epsilon_0 = const()[name = string("op_978_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_978_cast_fp16 = rsqrt(epsilon = var_978_epsilon_0, x = var_977_cast_fp16)[name = string("op_978_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = mul(x = inputs_21_cast_fp16, y = var_978_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + tensor w_21_to_fp16 = const()[name = string("w_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37789184)))]; + tensor current_key_normed_5_cast_fp16 = mul(x = w_21_to_fp16, y = hidden_states_25_cast_fp16)[name = string("current_key_normed_5_cast_fp16")]; + tensor var_996 = const()[name = string("op_996"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_996, x = query_normed_5_cast_fp16)[name = string("mh_q_13_cast_fp16")]; + tensor var_998 = const()[name = string("op_998"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_9_cast_fp16 = reshape(shape = var_998, x = current_key_normed_5_cast_fp16)[name = string("mh_k_9_cast_fp16")]; + tensor var_1002_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1002_cast_fp16")]; + tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1007_end_0 = const()[name = string("op_1007_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_1007_cast_fp16")]; + tensor var_1013_begin_0 = const()[name = string("op_1013_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1013_end_0 = const()[name = string("op_1013_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1013_end_mask_0 = const()[name = string("op_1013_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1013_cast_fp16 = slice_by_index(begin = var_1013_begin_0, end = var_1013_end_0, end_mask = var_1013_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_1013_cast_fp16")]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1015_cast_fp16 = mul(x = var_1013_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1015_cast_fp16")]; + bool var_1017_interleave_0 = const()[name = string("op_1017_interleave_0"), val = bool(false)]; + tensor var_1017_cast_fp16 = concat(axis = var_895, interleave = var_1017_interleave_0, values = (var_1015_cast_fp16, var_1007_cast_fp16))[name = string("op_1017_cast_fp16")]; + tensor var_1018_cast_fp16 = mul(x = var_1017_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1018_cast_fp16")]; + tensor mh_q_15_cast_fp16 = add(x = var_1002_cast_fp16, y = var_1018_cast_fp16)[name = string("mh_q_15_cast_fp16")]; + tensor var_1020_cast_fp16 = mul(x = mh_k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1020_cast_fp16")]; + tensor var_1025_begin_0 = const()[name = string("op_1025_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1025_end_0 = const()[name = string("op_1025_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1025_end_mask_0 = const()[name = string("op_1025_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1025_cast_fp16 = slice_by_index(begin = var_1025_begin_0, end = var_1025_end_0, end_mask = var_1025_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1025_cast_fp16")]; + tensor var_1031_begin_0 = const()[name = string("op_1031_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1031_end_0 = const()[name = string("op_1031_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1031_end_mask_0 = const()[name = string("op_1031_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = var_1031_end_0, end_mask = var_1031_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1031_cast_fp16")]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1033_cast_fp16 = mul(x = var_1031_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_1033_cast_fp16")]; + bool var_1035_interleave_0 = const()[name = string("op_1035_interleave_0"), val = bool(false)]; + tensor var_1035_cast_fp16 = concat(axis = var_895, interleave = var_1035_interleave_0, values = (var_1033_cast_fp16, var_1025_cast_fp16))[name = string("op_1035_cast_fp16")]; + tensor var_1036_cast_fp16 = mul(x = var_1035_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1036_cast_fp16")]; + tensor mh_k_11_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1036_cast_fp16)[name = string("mh_k_11_cast_fp16")]; + tensor var_1040 = const()[name = string("op_1040"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_11_cast_fp16 = reshape(shape = var_1040, x = mh_k_11_cast_fp16)[name = string("current_key_11_cast_fp16")]; + tensor var_1047_cast_fp16 = mul(x = var_96_cast_fp16_2, y = var_272_cast_fp16)[name = string("op_1047_cast_fp16")]; + tensor var_1048_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_270_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1047_cast_fp16, y = var_1048_cast_fp16)[name = string("key_15_cast_fp16")]; + tensor var_1051_cast_fp16 = mul(x = var_104_cast_fp16_2, y = var_272_cast_fp16)[name = string("op_1051_cast_fp16")]; + tensor var_1052_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_270_cast_fp16)[name = string("op_1052_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1051_cast_fp16, y = var_1052_cast_fp16)[name = string("value_9_cast_fp16")]; + tensor var_1056 = const()[name = string("op_1056"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_9_cast_fp16 = reshape(shape = var_1056, x = key_15_cast_fp16)[name = string("key_heads_9_cast_fp16")]; + tensor var_1058 = const()[name = string("op_1058"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_9_cast_fp16 = reshape(shape = var_1058, x = value_9_cast_fp16)[name = string("value_heads_9_cast_fp16")]; + tensor var_1061_begin_0 = const()[name = string("op_1061_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1061_end_0 = const()[name = string("op_1061_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1061_end_mask_0 = const()[name = string("op_1061_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1061_cast_fp16 = slice_by_index(begin = var_1061_begin_0, end = var_1061_end_0, end_mask = var_1061_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1061_cast_fp16")]; + tensor var_1065_begin_0 = const()[name = string("op_1065_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1065_end_0 = const()[name = string("op_1065_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1065_end_mask_0 = const()[name = string("op_1065_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1065_cast_fp16 = slice_by_index(begin = var_1065_begin_0, end = var_1065_end_0, end_mask = var_1065_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1065_cast_fp16")]; + tensor var_1077_begin_0 = const()[name = string("op_1077_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1077_end_0 = const()[name = string("op_1077_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1077_end_mask_0 = const()[name = string("op_1077_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1077_cast_fp16 = slice_by_index(begin = var_1077_begin_0, end = var_1077_end_0, end_mask = var_1077_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1077_cast_fp16")]; + tensor var_1081_begin_0 = const()[name = string("op_1081_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1081_end_0 = const()[name = string("op_1081_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1081_end_mask_0 = const()[name = string("op_1081_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1081_cast_fp16 = slice_by_index(begin = var_1081_begin_0, end = var_1081_end_0, end_mask = var_1081_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1081_cast_fp16")]; + tensor var_1093_begin_0 = const()[name = string("op_1093_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1093_end_0 = const()[name = string("op_1093_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1093_end_mask_0 = const()[name = string("op_1093_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1093_cast_fp16 = slice_by_index(begin = var_1093_begin_0, end = var_1093_end_0, end_mask = var_1093_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1093_cast_fp16")]; + tensor var_1097_begin_0 = const()[name = string("op_1097_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1097_end_0 = const()[name = string("op_1097_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1097_end_mask_0 = const()[name = string("op_1097_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1097_cast_fp16 = slice_by_index(begin = var_1097_begin_0, end = var_1097_end_0, end_mask = var_1097_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1097_cast_fp16")]; + tensor var_1109_begin_0 = const()[name = string("op_1109_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1109_end_0 = const()[name = string("op_1109_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1109_end_mask_0 = const()[name = string("op_1109_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1109_cast_fp16 = slice_by_index(begin = var_1109_begin_0, end = var_1109_end_0, end_mask = var_1109_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1109_cast_fp16")]; + tensor var_1113_begin_0 = const()[name = string("op_1113_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1113_end_0 = const()[name = string("op_1113_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1113_end_mask_0 = const()[name = string("op_1113_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1113_cast_fp16 = slice_by_index(begin = var_1113_begin_0, end = var_1113_end_0, end_mask = var_1113_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1113_cast_fp16")]; + tensor var_1125_begin_0 = const()[name = string("op_1125_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1125_end_0 = const()[name = string("op_1125_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1125_end_mask_0 = const()[name = string("op_1125_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1125_cast_fp16 = slice_by_index(begin = var_1125_begin_0, end = var_1125_end_0, end_mask = var_1125_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1125_cast_fp16")]; + tensor var_1129_begin_0 = const()[name = string("op_1129_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1129_end_0 = const()[name = string("op_1129_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1129_end_mask_0 = const()[name = string("op_1129_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1129_cast_fp16 = slice_by_index(begin = var_1129_begin_0, end = var_1129_end_0, end_mask = var_1129_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1129_cast_fp16")]; + tensor var_1141_begin_0 = const()[name = string("op_1141_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1141_end_0 = const()[name = string("op_1141_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1141_end_mask_0 = const()[name = string("op_1141_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1141_cast_fp16 = slice_by_index(begin = var_1141_begin_0, end = var_1141_end_0, end_mask = var_1141_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1141_cast_fp16")]; + tensor var_1145_begin_0 = const()[name = string("op_1145_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1145_end_0 = const()[name = string("op_1145_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1145_end_mask_0 = const()[name = string("op_1145_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1145_cast_fp16 = slice_by_index(begin = var_1145_begin_0, end = var_1145_end_0, end_mask = var_1145_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1145_cast_fp16")]; + tensor var_1157_begin_0 = const()[name = string("op_1157_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1157_end_0 = const()[name = string("op_1157_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1157_end_mask_0 = const()[name = string("op_1157_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1157_cast_fp16 = slice_by_index(begin = var_1157_begin_0, end = var_1157_end_0, end_mask = var_1157_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1157_cast_fp16")]; + tensor var_1161_begin_0 = const()[name = string("op_1161_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1161_end_0 = const()[name = string("op_1161_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1161_end_mask_0 = const()[name = string("op_1161_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1161_cast_fp16 = slice_by_index(begin = var_1161_begin_0, end = var_1161_end_0, end_mask = var_1161_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1161_cast_fp16")]; + tensor var_1173_begin_0 = const()[name = string("op_1173_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1173_end_0 = const()[name = string("op_1173_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1173_end_mask_0 = const()[name = string("op_1173_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1173_cast_fp16")]; + tensor var_1177_begin_0 = const()[name = string("op_1177_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1177_end_0 = const()[name = string("op_1177_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1177_end_mask_0 = const()[name = string("op_1177_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1177_cast_fp16 = slice_by_index(begin = var_1177_begin_0, end = var_1177_end_0, end_mask = var_1177_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1177_cast_fp16")]; + bool key_heads_11_interleave_0 = const()[name = string("key_heads_11_interleave_0"), val = bool(false)]; + tensor key_heads_11_cast_fp16 = concat(axis = var_903, interleave = key_heads_11_interleave_0, values = (var_1061_cast_fp16, var_1061_cast_fp16, var_1077_cast_fp16, var_1077_cast_fp16, var_1093_cast_fp16, var_1093_cast_fp16, var_1109_cast_fp16, var_1109_cast_fp16, var_1125_cast_fp16, var_1125_cast_fp16, var_1141_cast_fp16, var_1141_cast_fp16, var_1157_cast_fp16, var_1157_cast_fp16, var_1173_cast_fp16, var_1173_cast_fp16))[name = string("key_heads_11_cast_fp16")]; + bool value_heads_11_interleave_0 = const()[name = string("value_heads_11_interleave_0"), val = bool(false)]; + tensor value_heads_11_cast_fp16 = concat(axis = var_903, interleave = value_heads_11_interleave_0, values = (var_1065_cast_fp16, var_1065_cast_fp16, var_1081_cast_fp16, var_1081_cast_fp16, var_1097_cast_fp16, var_1097_cast_fp16, var_1113_cast_fp16, var_1113_cast_fp16, var_1129_cast_fp16, var_1129_cast_fp16, var_1145_cast_fp16, var_1145_cast_fp16, var_1161_cast_fp16, var_1161_cast_fp16, var_1177_cast_fp16, var_1177_cast_fp16))[name = string("value_heads_11_cast_fp16")]; + fp16 var_1200_to_fp16 = const()[name = string("op_1200_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1201_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1200_to_fp16)[name = string("op_1201_cast_fp16")]; + bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; + bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1201_cast_fp16, y = key_heads_11_cast_fp16)[name = string("mh_w_9_cast_fp16")]; + tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_436_cast_fp16)[name = string("mh_w_11_cast_fp16")]; + tensor var_1213_cast_fp16 = softmax(axis = var_885, x = mh_w_11_cast_fp16)[name = string("op_1213_cast_fp16")]; + bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; + bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = value_heads_11_cast_fp16, y = var_1213_cast_fp16)[name = string("attn_5_cast_fp16")]; + tensor var_1218 = const()[name = string("op_1218"), val = tensor([1, -1, 1, 1])]; + tensor input_17_cast_fp16 = reshape(shape = var_1218, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")]; + string obj_27_pad_type_0 = const()[name = string("obj_27_pad_type_0"), val = string("valid")]; + tensor obj_27_strides_0 = const()[name = string("obj_27_strides_0"), val = tensor([1, 1])]; + tensor obj_27_pad_0 = const()[name = string("obj_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_27_dilations_0 = const()[name = string("obj_27_dilations_0"), val = tensor([1, 1])]; + int32 obj_27_groups_0 = const()[name = string("obj_27_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37789504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39886720))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_27_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_27_dilations_0, groups = obj_27_groups_0, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = obj_27_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("obj_27_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_23_cast_fp16")]; + tensor inputs_sq_23_cast_fp16 = mul(x = inputs_23_cast_fp16, y = inputs_23_cast_fp16)[name = string("inputs_sq_23_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = inputs_sq_23_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1236_to_fp16 = const()[name = string("op_1236_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1237_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1236_to_fp16)[name = string("op_1237_cast_fp16")]; + fp32 var_1238_epsilon_0 = const()[name = string("op_1238_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1238_cast_fp16 = rsqrt(epsilon = var_1238_epsilon_0, x = var_1237_cast_fp16)[name = string("op_1238_cast_fp16")]; + tensor hidden_states_27_cast_fp16 = mul(x = inputs_23_cast_fp16, y = var_1238_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor w_23_to_fp16 = const()[name = string("w_23_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39887296)))]; + tensor input_19_cast_fp16 = mul(x = w_23_to_fp16, y = hidden_states_27_cast_fp16)[name = string("input_19_cast_fp16")]; + string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; + tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; + tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; + int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; + tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39889408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43035200))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_1252_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_1252_cast_fp16")]; + string var_1258_pad_type_0 = const()[name = string("op_1258_pad_type_0"), val = string("valid")]; + tensor var_1258_strides_0 = const()[name = string("op_1258_strides_0"), val = tensor([1, 1])]; + tensor var_1258_pad_0 = const()[name = string("op_1258_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1258_dilations_0 = const()[name = string("op_1258_dilations_0"), val = tensor([1, 1])]; + int32 var_1258_groups_0 = const()[name = string("op_1258_groups_0"), val = int32(1)]; + tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43035776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46181568))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1258_cast_fp16 = conv(dilations = var_1258_dilations_0, groups = var_1258_groups_0, pad = var_1258_pad_0, pad_type = var_1258_pad_type_0, strides = var_1258_strides_0, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_1258_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1258_cast_fp16)[name = string("input_23_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46182144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49327936))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("inputs_25_cast_fp16")]; + int32 var_1272 = const()[name = string("op_1272"), val = int32(3)]; + int32 var_1282 = const()[name = string("op_1282"), val = int32(-2)]; + int32 var_1290 = const()[name = string("op_1290"), val = int32(1)]; + tensor inputs_sq_25_cast_fp16 = mul(x = inputs_25_cast_fp16, y = inputs_25_cast_fp16)[name = string("inputs_sq_25_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = inputs_sq_25_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1303_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1302_to_fp16)[name = string("op_1303_cast_fp16")]; + fp32 var_1304_epsilon_0 = const()[name = string("op_1304_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1304_cast_fp16 = rsqrt(epsilon = var_1304_epsilon_0, x = var_1303_cast_fp16)[name = string("op_1304_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = mul(x = inputs_25_cast_fp16, y = var_1304_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor w_25_to_fp16 = const()[name = string("w_25_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49328512)))]; + tensor obj_29_cast_fp16 = mul(x = w_25_to_fp16, y = hidden_states_31_cast_fp16)[name = string("obj_29_cast_fp16")]; + string query_19_pad_type_0 = const()[name = string("query_19_pad_type_0"), val = string("valid")]; + tensor query_19_strides_0 = const()[name = string("query_19_strides_0"), val = tensor([1, 1])]; + tensor query_19_pad_0 = const()[name = string("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_19_dilations_0 = const()[name = string("query_19_dilations_0"), val = tensor([1, 1])]; + int32 query_19_groups_0 = const()[name = string("query_19_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49330624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51427840))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_19_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("query_19_cast_fp16")]; + string current_key_13_pad_type_0 = const()[name = string("current_key_13_pad_type_0"), val = string("valid")]; + tensor current_key_13_strides_0 = const()[name = string("current_key_13_strides_0"), val = tensor([1, 1])]; + tensor current_key_13_pad_0 = const()[name = string("current_key_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_13_dilations_0 = const()[name = string("current_key_13_dilations_0"), val = tensor([1, 1])]; + int32 current_key_13_groups_0 = const()[name = string("current_key_13_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51428416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52477056))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_13_cast_fp16 = conv(dilations = current_key_13_dilations_0, groups = current_key_13_groups_0, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = current_key_13_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_key_13_cast_fp16")]; + string current_value_7_pad_type_0 = const()[name = string("current_value_7_pad_type_0"), val = string("valid")]; + tensor current_value_7_strides_0 = const()[name = string("current_value_7_strides_0"), val = tensor([1, 1])]; + tensor current_value_7_pad_0 = const()[name = string("current_value_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_7_dilations_0 = const()[name = string("current_value_7_dilations_0"), val = tensor([1, 1])]; + int32 current_value_7_groups_0 = const()[name = string("current_value_7_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52477632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53526272))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_7_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_7_dilations_0, groups = current_value_7_groups_0, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = current_value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_value_7_cast_fp16")]; + tensor var_1341 = const()[name = string("op_1341"), val = tensor([16, 128, 1, 1])]; + tensor inputs_27_cast_fp16 = reshape(shape = var_1341, x = query_19_cast_fp16)[name = string("inputs_27_cast_fp16")]; + tensor inputs_sq_27_cast_fp16 = mul(x = inputs_27_cast_fp16, y = inputs_27_cast_fp16)[name = string("inputs_sq_27_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = inputs_sq_27_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1347_to_fp16 = const()[name = string("op_1347_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1348_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1347_to_fp16)[name = string("op_1348_cast_fp16")]; + fp32 var_1349_epsilon_0 = const()[name = string("op_1349_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1349_cast_fp16 = rsqrt(epsilon = var_1349_epsilon_0, x = var_1348_cast_fp16)[name = string("op_1349_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = inputs_27_cast_fp16, y = var_1349_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor w_27_to_fp16 = const()[name = string("w_27_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53526848)))]; + tensor query_normed_7_cast_fp16 = mul(x = w_27_to_fp16, y = hidden_states_33_cast_fp16)[name = string("query_normed_7_cast_fp16")]; + tensor var_1357 = const()[name = string("op_1357"), val = tensor([8, 128, 1, 1])]; + tensor inputs_29_cast_fp16 = reshape(shape = var_1357, x = current_key_13_cast_fp16)[name = string("inputs_29_cast_fp16")]; + tensor inputs_sq_29_cast_fp16 = mul(x = inputs_29_cast_fp16, y = inputs_29_cast_fp16)[name = string("inputs_sq_29_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = inputs_sq_29_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1364_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1363_to_fp16)[name = string("op_1364_cast_fp16")]; + fp32 var_1365_epsilon_0 = const()[name = string("op_1365_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1365_cast_fp16 = rsqrt(epsilon = var_1365_epsilon_0, x = var_1364_cast_fp16)[name = string("op_1365_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = mul(x = inputs_29_cast_fp16, y = var_1365_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor w_29_to_fp16 = const()[name = string("w_29_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53527168)))]; + tensor current_key_normed_7_cast_fp16 = mul(x = w_29_to_fp16, y = hidden_states_35_cast_fp16)[name = string("current_key_normed_7_cast_fp16")]; + tensor var_1383 = const()[name = string("op_1383"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1383, x = query_normed_7_cast_fp16)[name = string("mh_q_19_cast_fp16")]; + tensor var_1385 = const()[name = string("op_1385"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_13_cast_fp16 = reshape(shape = var_1385, x = current_key_normed_7_cast_fp16)[name = string("mh_k_13_cast_fp16")]; + tensor var_1389_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1389_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = string("op_1394_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1394_end_0 = const()[name = string("op_1394_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1394_end_mask_0 = const()[name = string("op_1394_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1394_cast_fp16")]; + tensor var_1400_begin_0 = const()[name = string("op_1400_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1400_end_0 = const()[name = string("op_1400_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1400_end_mask_0 = const()[name = string("op_1400_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1400_cast_fp16 = slice_by_index(begin = var_1400_begin_0, end = var_1400_end_0, end_mask = var_1400_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1400_cast_fp16")]; + fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1402_cast_fp16 = mul(x = var_1400_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_1402_cast_fp16")]; + bool var_1404_interleave_0 = const()[name = string("op_1404_interleave_0"), val = bool(false)]; + tensor var_1404_cast_fp16 = concat(axis = var_1282, interleave = var_1404_interleave_0, values = (var_1402_cast_fp16, var_1394_cast_fp16))[name = string("op_1404_cast_fp16")]; + tensor var_1405_cast_fp16 = mul(x = var_1404_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1405_cast_fp16")]; + tensor mh_q_21_cast_fp16 = add(x = var_1389_cast_fp16, y = var_1405_cast_fp16)[name = string("mh_q_21_cast_fp16")]; + tensor var_1407_cast_fp16 = mul(x = mh_k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1407_cast_fp16")]; + tensor var_1412_begin_0 = const()[name = string("op_1412_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1412_end_0 = const()[name = string("op_1412_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1412_end_mask_0 = const()[name = string("op_1412_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1412_cast_fp16 = slice_by_index(begin = var_1412_begin_0, end = var_1412_end_0, end_mask = var_1412_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1412_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = string("op_1418_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1418_end_0 = const()[name = string("op_1418_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1418_end_mask_0 = const()[name = string("op_1418_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1418_cast_fp16")]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1420_cast_fp16 = mul(x = var_1418_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_1420_cast_fp16")]; + bool var_1422_interleave_0 = const()[name = string("op_1422_interleave_0"), val = bool(false)]; + tensor var_1422_cast_fp16 = concat(axis = var_1282, interleave = var_1422_interleave_0, values = (var_1420_cast_fp16, var_1412_cast_fp16))[name = string("op_1422_cast_fp16")]; + tensor var_1423_cast_fp16 = mul(x = var_1422_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1423_cast_fp16")]; + tensor mh_k_15_cast_fp16 = add(x = var_1407_cast_fp16, y = var_1423_cast_fp16)[name = string("mh_k_15_cast_fp16")]; + tensor var_1427 = const()[name = string("op_1427"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_15_cast_fp16 = reshape(shape = var_1427, x = mh_k_15_cast_fp16)[name = string("current_key_15_cast_fp16")]; + tensor var_1434_cast_fp16 = mul(x = var_96_cast_fp16_3, y = var_272_cast_fp16)[name = string("op_1434_cast_fp16")]; + tensor var_1435_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_270_cast_fp16)[name = string("op_1435_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_1434_cast_fp16, y = var_1435_cast_fp16)[name = string("key_21_cast_fp16")]; + tensor var_1438_cast_fp16 = mul(x = var_104_cast_fp16_3, y = var_272_cast_fp16)[name = string("op_1438_cast_fp16")]; + tensor var_1439_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_270_cast_fp16)[name = string("op_1439_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1438_cast_fp16, y = var_1439_cast_fp16)[name = string("value_13_cast_fp16")]; + tensor var_1443 = const()[name = string("op_1443"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_13_cast_fp16 = reshape(shape = var_1443, x = key_21_cast_fp16)[name = string("key_heads_13_cast_fp16")]; + tensor var_1445 = const()[name = string("op_1445"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_13_cast_fp16 = reshape(shape = var_1445, x = value_13_cast_fp16)[name = string("value_heads_13_cast_fp16")]; + tensor var_1448_begin_0 = const()[name = string("op_1448_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1448_end_0 = const()[name = string("op_1448_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1448_end_mask_0 = const()[name = string("op_1448_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1448_cast_fp16 = slice_by_index(begin = var_1448_begin_0, end = var_1448_end_0, end_mask = var_1448_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1448_cast_fp16")]; + tensor var_1452_begin_0 = const()[name = string("op_1452_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1452_end_0 = const()[name = string("op_1452_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1452_end_mask_0 = const()[name = string("op_1452_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1452_cast_fp16 = slice_by_index(begin = var_1452_begin_0, end = var_1452_end_0, end_mask = var_1452_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1452_cast_fp16")]; + tensor var_1464_begin_0 = const()[name = string("op_1464_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1464_end_0 = const()[name = string("op_1464_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1464_end_mask_0 = const()[name = string("op_1464_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1464_cast_fp16 = slice_by_index(begin = var_1464_begin_0, end = var_1464_end_0, end_mask = var_1464_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1464_cast_fp16")]; + tensor var_1468_begin_0 = const()[name = string("op_1468_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1468_end_0 = const()[name = string("op_1468_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1468_end_mask_0 = const()[name = string("op_1468_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1468_cast_fp16 = slice_by_index(begin = var_1468_begin_0, end = var_1468_end_0, end_mask = var_1468_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1468_cast_fp16")]; + tensor var_1480_begin_0 = const()[name = string("op_1480_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1480_end_0 = const()[name = string("op_1480_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1480_end_mask_0 = const()[name = string("op_1480_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1480_cast_fp16 = slice_by_index(begin = var_1480_begin_0, end = var_1480_end_0, end_mask = var_1480_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1480_cast_fp16")]; + tensor var_1484_begin_0 = const()[name = string("op_1484_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1484_end_0 = const()[name = string("op_1484_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1484_end_mask_0 = const()[name = string("op_1484_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = var_1484_end_0, end_mask = var_1484_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1484_cast_fp16")]; + tensor var_1496_begin_0 = const()[name = string("op_1496_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1496_end_0 = const()[name = string("op_1496_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1496_end_mask_0 = const()[name = string("op_1496_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1496_cast_fp16 = slice_by_index(begin = var_1496_begin_0, end = var_1496_end_0, end_mask = var_1496_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1496_cast_fp16")]; + tensor var_1500_begin_0 = const()[name = string("op_1500_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1500_end_0 = const()[name = string("op_1500_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1500_end_mask_0 = const()[name = string("op_1500_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1500_cast_fp16 = slice_by_index(begin = var_1500_begin_0, end = var_1500_end_0, end_mask = var_1500_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1500_cast_fp16")]; + tensor var_1512_begin_0 = const()[name = string("op_1512_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1512_end_0 = const()[name = string("op_1512_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1512_end_mask_0 = const()[name = string("op_1512_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1512_cast_fp16 = slice_by_index(begin = var_1512_begin_0, end = var_1512_end_0, end_mask = var_1512_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1512_cast_fp16")]; + tensor var_1516_begin_0 = const()[name = string("op_1516_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1516_end_0 = const()[name = string("op_1516_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1516_end_mask_0 = const()[name = string("op_1516_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1516_cast_fp16 = slice_by_index(begin = var_1516_begin_0, end = var_1516_end_0, end_mask = var_1516_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1516_cast_fp16")]; + tensor var_1528_begin_0 = const()[name = string("op_1528_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1528_end_0 = const()[name = string("op_1528_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1528_end_mask_0 = const()[name = string("op_1528_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1528_cast_fp16 = slice_by_index(begin = var_1528_begin_0, end = var_1528_end_0, end_mask = var_1528_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1528_cast_fp16")]; + tensor var_1532_begin_0 = const()[name = string("op_1532_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1532_end_0 = const()[name = string("op_1532_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1532_end_mask_0 = const()[name = string("op_1532_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1532_cast_fp16 = slice_by_index(begin = var_1532_begin_0, end = var_1532_end_0, end_mask = var_1532_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1532_cast_fp16")]; + tensor var_1544_begin_0 = const()[name = string("op_1544_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1544_end_0 = const()[name = string("op_1544_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1544_end_mask_0 = const()[name = string("op_1544_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1544_cast_fp16 = slice_by_index(begin = var_1544_begin_0, end = var_1544_end_0, end_mask = var_1544_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1544_cast_fp16")]; + tensor var_1548_begin_0 = const()[name = string("op_1548_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1548_end_0 = const()[name = string("op_1548_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1548_end_mask_0 = const()[name = string("op_1548_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1548_cast_fp16 = slice_by_index(begin = var_1548_begin_0, end = var_1548_end_0, end_mask = var_1548_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1548_cast_fp16")]; + tensor var_1560_begin_0 = const()[name = string("op_1560_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1560_end_0 = const()[name = string("op_1560_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1560_end_mask_0 = const()[name = string("op_1560_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1560_cast_fp16 = slice_by_index(begin = var_1560_begin_0, end = var_1560_end_0, end_mask = var_1560_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1560_cast_fp16")]; + tensor var_1564_begin_0 = const()[name = string("op_1564_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1564_end_0 = const()[name = string("op_1564_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1564_end_mask_0 = const()[name = string("op_1564_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1564_cast_fp16")]; + bool key_heads_15_interleave_0 = const()[name = string("key_heads_15_interleave_0"), val = bool(false)]; + tensor key_heads_15_cast_fp16 = concat(axis = var_1290, interleave = key_heads_15_interleave_0, values = (var_1448_cast_fp16, var_1448_cast_fp16, var_1464_cast_fp16, var_1464_cast_fp16, var_1480_cast_fp16, var_1480_cast_fp16, var_1496_cast_fp16, var_1496_cast_fp16, var_1512_cast_fp16, var_1512_cast_fp16, var_1528_cast_fp16, var_1528_cast_fp16, var_1544_cast_fp16, var_1544_cast_fp16, var_1560_cast_fp16, var_1560_cast_fp16))[name = string("key_heads_15_cast_fp16")]; + bool value_heads_15_interleave_0 = const()[name = string("value_heads_15_interleave_0"), val = bool(false)]; + tensor value_heads_15_cast_fp16 = concat(axis = var_1290, interleave = value_heads_15_interleave_0, values = (var_1452_cast_fp16, var_1452_cast_fp16, var_1468_cast_fp16, var_1468_cast_fp16, var_1484_cast_fp16, var_1484_cast_fp16, var_1500_cast_fp16, var_1500_cast_fp16, var_1516_cast_fp16, var_1516_cast_fp16, var_1532_cast_fp16, var_1532_cast_fp16, var_1548_cast_fp16, var_1548_cast_fp16, var_1564_cast_fp16, var_1564_cast_fp16))[name = string("value_heads_15_cast_fp16")]; + fp16 var_1587_to_fp16 = const()[name = string("op_1587_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1587_to_fp16)[name = string("op_1588_cast_fp16")]; + bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; + bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1588_cast_fp16, y = key_heads_15_cast_fp16)[name = string("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_436_cast_fp16)[name = string("mh_w_15_cast_fp16")]; + tensor var_1600_cast_fp16 = softmax(axis = var_1272, x = mh_w_15_cast_fp16)[name = string("op_1600_cast_fp16")]; + bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; + bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = value_heads_15_cast_fp16, y = var_1600_cast_fp16)[name = string("attn_7_cast_fp16")]; + tensor var_1605 = const()[name = string("op_1605"), val = tensor([1, -1, 1, 1])]; + tensor input_25_cast_fp16 = reshape(shape = var_1605, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")]; + string obj_35_pad_type_0 = const()[name = string("obj_35_pad_type_0"), val = string("valid")]; + tensor obj_35_strides_0 = const()[name = string("obj_35_strides_0"), val = tensor([1, 1])]; + tensor obj_35_pad_0 = const()[name = string("obj_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_35_dilations_0 = const()[name = string("obj_35_dilations_0"), val = tensor([1, 1])]; + int32 obj_35_groups_0 = const()[name = string("obj_35_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53527488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55624704))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_35_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("obj_35_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_31_cast_fp16")]; + tensor inputs_sq_31_cast_fp16 = mul(x = inputs_31_cast_fp16, y = inputs_31_cast_fp16)[name = string("inputs_sq_31_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = inputs_sq_31_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1624_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1623_to_fp16)[name = string("op_1624_cast_fp16")]; + fp32 var_1625_epsilon_0 = const()[name = string("op_1625_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1625_cast_fp16 = rsqrt(epsilon = var_1625_epsilon_0, x = var_1624_cast_fp16)[name = string("op_1625_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = mul(x = inputs_31_cast_fp16, y = var_1625_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor w_31_to_fp16 = const()[name = string("w_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55625280)))]; + tensor input_27_cast_fp16 = mul(x = w_31_to_fp16, y = hidden_states_37_cast_fp16)[name = string("input_27_cast_fp16")]; + string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; + tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; + tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; + int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; + tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55627392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58773184))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_1639_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_1639_cast_fp16")]; + string var_1645_pad_type_0 = const()[name = string("op_1645_pad_type_0"), val = string("valid")]; + tensor var_1645_strides_0 = const()[name = string("op_1645_strides_0"), val = tensor([1, 1])]; + tensor var_1645_pad_0 = const()[name = string("op_1645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1645_dilations_0 = const()[name = string("op_1645_dilations_0"), val = tensor([1, 1])]; + int32 var_1645_groups_0 = const()[name = string("op_1645_groups_0"), val = int32(1)]; + tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58773760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61919552))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1645_cast_fp16 = conv(dilations = var_1645_dilations_0, groups = var_1645_groups_0, pad = var_1645_pad_0, pad_type = var_1645_pad_type_0, strides = var_1645_strides_0, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_1645_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_1639_cast_fp16, y = var_1645_cast_fp16)[name = string("input_31_cast_fp16")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61920128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65065920))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("inputs_33_cast_fp16")]; + int32 var_1659 = const()[name = string("op_1659"), val = int32(3)]; + int32 var_1669 = const()[name = string("op_1669"), val = int32(-2)]; + int32 var_1677 = const()[name = string("op_1677"), val = int32(1)]; + tensor inputs_sq_33_cast_fp16 = mul(x = inputs_33_cast_fp16, y = inputs_33_cast_fp16)[name = string("inputs_sq_33_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = inputs_sq_33_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1689_to_fp16 = const()[name = string("op_1689_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1690_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1689_to_fp16)[name = string("op_1690_cast_fp16")]; + fp32 var_1691_epsilon_0 = const()[name = string("op_1691_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1691_cast_fp16 = rsqrt(epsilon = var_1691_epsilon_0, x = var_1690_cast_fp16)[name = string("op_1691_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = mul(x = inputs_33_cast_fp16, y = var_1691_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor w_33_to_fp16 = const()[name = string("w_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65066496)))]; + tensor obj_37_cast_fp16 = mul(x = w_33_to_fp16, y = hidden_states_41_cast_fp16)[name = string("obj_37_cast_fp16")]; + string query_25_pad_type_0 = const()[name = string("query_25_pad_type_0"), val = string("valid")]; + tensor query_25_strides_0 = const()[name = string("query_25_strides_0"), val = tensor([1, 1])]; + tensor query_25_pad_0 = const()[name = string("query_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_25_dilations_0 = const()[name = string("query_25_dilations_0"), val = tensor([1, 1])]; + int32 query_25_groups_0 = const()[name = string("query_25_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65068608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67165824))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_25_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_25_dilations_0, groups = query_25_groups_0, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = query_25_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("query_25_cast_fp16")]; + string current_key_17_pad_type_0 = const()[name = string("current_key_17_pad_type_0"), val = string("valid")]; + tensor current_key_17_strides_0 = const()[name = string("current_key_17_strides_0"), val = tensor([1, 1])]; + tensor current_key_17_pad_0 = const()[name = string("current_key_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_17_dilations_0 = const()[name = string("current_key_17_dilations_0"), val = tensor([1, 1])]; + int32 current_key_17_groups_0 = const()[name = string("current_key_17_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67166400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68215040))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_17_cast_fp16 = conv(dilations = current_key_17_dilations_0, groups = current_key_17_groups_0, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = current_key_17_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_key_17_cast_fp16")]; + string current_value_pad_type_0 = const()[name = string("current_value_pad_type_0"), val = string("valid")]; + tensor current_value_strides_0 = const()[name = string("current_value_strides_0"), val = tensor([1, 1])]; + tensor current_value_pad_0 = const()[name = string("current_value_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_dilations_0 = const()[name = string("current_value_dilations_0"), val = tensor([1, 1])]; + int32 current_value_groups_0 = const()[name = string("current_value_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68215616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69264256))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_value_cast_fp16")]; + tensor var_1728 = const()[name = string("op_1728"), val = tensor([16, 128, 1, 1])]; + tensor inputs_35_cast_fp16 = reshape(shape = var_1728, x = query_25_cast_fp16)[name = string("inputs_35_cast_fp16")]; + tensor inputs_sq_35_cast_fp16 = mul(x = inputs_35_cast_fp16, y = inputs_35_cast_fp16)[name = string("inputs_sq_35_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = inputs_sq_35_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1735_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1734_to_fp16)[name = string("op_1735_cast_fp16")]; + fp32 var_1736_epsilon_0 = const()[name = string("op_1736_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1736_cast_fp16 = rsqrt(epsilon = var_1736_epsilon_0, x = var_1735_cast_fp16)[name = string("op_1736_cast_fp16")]; + tensor hidden_states_43_cast_fp16 = mul(x = inputs_35_cast_fp16, y = var_1736_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; + tensor w_35_to_fp16 = const()[name = string("w_35_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69264832)))]; + tensor query_normed_cast_fp16 = mul(x = w_35_to_fp16, y = hidden_states_43_cast_fp16)[name = string("query_normed_cast_fp16")]; + tensor var_1744 = const()[name = string("op_1744"), val = tensor([8, 128, 1, 1])]; + tensor inputs_37_cast_fp16 = reshape(shape = var_1744, x = current_key_17_cast_fp16)[name = string("inputs_37_cast_fp16")]; + tensor inputs_sq_37_cast_fp16 = mul(x = inputs_37_cast_fp16, y = inputs_37_cast_fp16)[name = string("inputs_sq_37_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = inputs_sq_37_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1751_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1750_to_fp16)[name = string("op_1751_cast_fp16")]; + fp32 var_1752_epsilon_0 = const()[name = string("op_1752_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1752_cast_fp16 = rsqrt(epsilon = var_1752_epsilon_0, x = var_1751_cast_fp16)[name = string("op_1752_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = mul(x = inputs_37_cast_fp16, y = var_1752_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor w_37_to_fp16 = const()[name = string("w_37_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69265152)))]; + tensor current_key_normed_cast_fp16 = mul(x = w_37_to_fp16, y = hidden_states_45_cast_fp16)[name = string("current_key_normed_cast_fp16")]; + tensor var_1770 = const()[name = string("op_1770"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_1770, x = query_normed_cast_fp16)[name = string("mh_q_25_cast_fp16")]; + tensor var_1772 = const()[name = string("op_1772"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_17_cast_fp16 = reshape(shape = var_1772, x = current_key_normed_cast_fp16)[name = string("mh_k_17_cast_fp16")]; + tensor var_1776_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1776_cast_fp16")]; + tensor var_1781_begin_0 = const()[name = string("op_1781_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1781_end_0 = const()[name = string("op_1781_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1781_end_mask_0 = const()[name = string("op_1781_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1781_cast_fp16 = slice_by_index(begin = var_1781_begin_0, end = var_1781_end_0, end_mask = var_1781_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1781_cast_fp16")]; + tensor var_1787_begin_0 = const()[name = string("op_1787_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1787_end_0 = const()[name = string("op_1787_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1787_end_mask_0 = const()[name = string("op_1787_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1787_cast_fp16")]; + fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1789_cast_fp16 = mul(x = var_1787_cast_fp16, y = const_109_promoted_to_fp16)[name = string("op_1789_cast_fp16")]; + bool var_1791_interleave_0 = const()[name = string("op_1791_interleave_0"), val = bool(false)]; + tensor var_1791_cast_fp16 = concat(axis = var_1669, interleave = var_1791_interleave_0, values = (var_1789_cast_fp16, var_1781_cast_fp16))[name = string("op_1791_cast_fp16")]; + tensor var_1792_cast_fp16 = mul(x = var_1791_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1792_cast_fp16")]; + tensor mh_q_27_cast_fp16 = add(x = var_1776_cast_fp16, y = var_1792_cast_fp16)[name = string("mh_q_27_cast_fp16")]; + tensor var_1794_cast_fp16 = mul(x = mh_k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1794_cast_fp16")]; + tensor var_1799_begin_0 = const()[name = string("op_1799_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1799_end_0 = const()[name = string("op_1799_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1799_end_mask_0 = const()[name = string("op_1799_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1799_cast_fp16 = slice_by_index(begin = var_1799_begin_0, end = var_1799_end_0, end_mask = var_1799_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1799_cast_fp16")]; + tensor var_1805_begin_0 = const()[name = string("op_1805_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1805_end_0 = const()[name = string("op_1805_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1805_end_mask_0 = const()[name = string("op_1805_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1805_cast_fp16 = slice_by_index(begin = var_1805_begin_0, end = var_1805_end_0, end_mask = var_1805_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1805_cast_fp16")]; + fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1807_cast_fp16 = mul(x = var_1805_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_1807_cast_fp16")]; + bool var_1809_interleave_0 = const()[name = string("op_1809_interleave_0"), val = bool(false)]; + tensor var_1809_cast_fp16 = concat(axis = var_1669, interleave = var_1809_interleave_0, values = (var_1807_cast_fp16, var_1799_cast_fp16))[name = string("op_1809_cast_fp16")]; + tensor var_1810_cast_fp16 = mul(x = var_1809_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1810_cast_fp16")]; + tensor mh_k_cast_fp16 = add(x = var_1794_cast_fp16, y = var_1810_cast_fp16)[name = string("mh_k_cast_fp16")]; + tensor var_1814 = const()[name = string("op_1814"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_cast_fp16 = reshape(shape = var_1814, x = mh_k_cast_fp16)[name = string("current_key_cast_fp16")]; + tensor var_1821_cast_fp16 = mul(x = var_96_cast_fp16_4, y = var_272_cast_fp16)[name = string("op_1821_cast_fp16")]; + tensor var_1822_cast_fp16 = mul(x = current_key_cast_fp16, y = var_270_cast_fp16)[name = string("op_1822_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_1821_cast_fp16, y = var_1822_cast_fp16)[name = string("key_27_cast_fp16")]; + tensor var_1825_cast_fp16 = mul(x = var_104_cast_fp16_4, y = var_272_cast_fp16)[name = string("op_1825_cast_fp16")]; + tensor var_1826_cast_fp16 = mul(x = current_value_cast_fp16, y = var_270_cast_fp16)[name = string("op_1826_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1825_cast_fp16, y = var_1826_cast_fp16)[name = string("value_17_cast_fp16")]; + tensor var_1830 = const()[name = string("op_1830"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_17_cast_fp16 = reshape(shape = var_1830, x = key_27_cast_fp16)[name = string("key_heads_17_cast_fp16")]; + tensor var_1832 = const()[name = string("op_1832"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_17_cast_fp16 = reshape(shape = var_1832, x = value_17_cast_fp16)[name = string("value_heads_17_cast_fp16")]; + tensor var_1835_begin_0 = const()[name = string("op_1835_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1835_end_0 = const()[name = string("op_1835_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1835_end_mask_0 = const()[name = string("op_1835_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1835_cast_fp16 = slice_by_index(begin = var_1835_begin_0, end = var_1835_end_0, end_mask = var_1835_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1835_cast_fp16")]; + tensor var_1839_begin_0 = const()[name = string("op_1839_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1839_end_0 = const()[name = string("op_1839_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1839_end_mask_0 = const()[name = string("op_1839_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1839_cast_fp16 = slice_by_index(begin = var_1839_begin_0, end = var_1839_end_0, end_mask = var_1839_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1839_cast_fp16")]; + tensor var_1851_begin_0 = const()[name = string("op_1851_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1851_end_0 = const()[name = string("op_1851_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1851_end_mask_0 = const()[name = string("op_1851_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1851_cast_fp16 = slice_by_index(begin = var_1851_begin_0, end = var_1851_end_0, end_mask = var_1851_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1851_cast_fp16")]; + tensor var_1855_begin_0 = const()[name = string("op_1855_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1855_end_0 = const()[name = string("op_1855_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1855_end_mask_0 = const()[name = string("op_1855_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1855_cast_fp16 = slice_by_index(begin = var_1855_begin_0, end = var_1855_end_0, end_mask = var_1855_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor var_1867_begin_0 = const()[name = string("op_1867_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1867_end_0 = const()[name = string("op_1867_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1867_end_mask_0 = const()[name = string("op_1867_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1867_cast_fp16 = slice_by_index(begin = var_1867_begin_0, end = var_1867_end_0, end_mask = var_1867_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1867_cast_fp16")]; + tensor var_1871_begin_0 = const()[name = string("op_1871_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1871_end_0 = const()[name = string("op_1871_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1871_end_mask_0 = const()[name = string("op_1871_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1871_cast_fp16 = slice_by_index(begin = var_1871_begin_0, end = var_1871_end_0, end_mask = var_1871_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1871_cast_fp16")]; + tensor var_1883_begin_0 = const()[name = string("op_1883_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1883_end_0 = const()[name = string("op_1883_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1883_end_mask_0 = const()[name = string("op_1883_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1883_cast_fp16 = slice_by_index(begin = var_1883_begin_0, end = var_1883_end_0, end_mask = var_1883_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1883_cast_fp16")]; + tensor var_1887_begin_0 = const()[name = string("op_1887_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1887_end_0 = const()[name = string("op_1887_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1887_end_mask_0 = const()[name = string("op_1887_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1887_cast_fp16 = slice_by_index(begin = var_1887_begin_0, end = var_1887_end_0, end_mask = var_1887_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1887_cast_fp16")]; + tensor var_1899_begin_0 = const()[name = string("op_1899_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1899_end_0 = const()[name = string("op_1899_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1899_end_mask_0 = const()[name = string("op_1899_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1899_cast_fp16 = slice_by_index(begin = var_1899_begin_0, end = var_1899_end_0, end_mask = var_1899_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1899_cast_fp16")]; + tensor var_1903_begin_0 = const()[name = string("op_1903_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1903_end_0 = const()[name = string("op_1903_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1903_end_mask_0 = const()[name = string("op_1903_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1903_cast_fp16")]; + tensor var_1915_begin_0 = const()[name = string("op_1915_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1915_end_0 = const()[name = string("op_1915_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1915_end_mask_0 = const()[name = string("op_1915_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1915_cast_fp16 = slice_by_index(begin = var_1915_begin_0, end = var_1915_end_0, end_mask = var_1915_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1915_cast_fp16")]; + tensor var_1919_begin_0 = const()[name = string("op_1919_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1919_end_0 = const()[name = string("op_1919_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1919_end_mask_0 = const()[name = string("op_1919_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1919_cast_fp16 = slice_by_index(begin = var_1919_begin_0, end = var_1919_end_0, end_mask = var_1919_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1919_cast_fp16")]; + tensor var_1931_begin_0 = const()[name = string("op_1931_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1931_end_0 = const()[name = string("op_1931_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1931_end_mask_0 = const()[name = string("op_1931_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1931_cast_fp16 = slice_by_index(begin = var_1931_begin_0, end = var_1931_end_0, end_mask = var_1931_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1931_cast_fp16")]; + tensor var_1935_begin_0 = const()[name = string("op_1935_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1935_end_0 = const()[name = string("op_1935_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1935_end_mask_0 = const()[name = string("op_1935_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1935_cast_fp16 = slice_by_index(begin = var_1935_begin_0, end = var_1935_end_0, end_mask = var_1935_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1935_cast_fp16")]; + tensor var_1947_begin_0 = const()[name = string("op_1947_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1947_end_0 = const()[name = string("op_1947_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1947_end_mask_0 = const()[name = string("op_1947_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1947_cast_fp16 = slice_by_index(begin = var_1947_begin_0, end = var_1947_end_0, end_mask = var_1947_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1947_cast_fp16")]; + tensor var_1951_begin_0 = const()[name = string("op_1951_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1951_end_0 = const()[name = string("op_1951_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1951_end_mask_0 = const()[name = string("op_1951_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1951_cast_fp16 = slice_by_index(begin = var_1951_begin_0, end = var_1951_end_0, end_mask = var_1951_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1951_cast_fp16")]; + bool key_heads_interleave_0 = const()[name = string("key_heads_interleave_0"), val = bool(false)]; + tensor key_heads_cast_fp16 = concat(axis = var_1677, interleave = key_heads_interleave_0, values = (var_1835_cast_fp16, var_1835_cast_fp16, var_1851_cast_fp16, var_1851_cast_fp16, var_1867_cast_fp16, var_1867_cast_fp16, var_1883_cast_fp16, var_1883_cast_fp16, var_1899_cast_fp16, var_1899_cast_fp16, var_1915_cast_fp16, var_1915_cast_fp16, var_1931_cast_fp16, var_1931_cast_fp16, var_1947_cast_fp16, var_1947_cast_fp16))[name = string("key_heads_cast_fp16")]; + bool value_heads_interleave_0 = const()[name = string("value_heads_interleave_0"), val = bool(false)]; + tensor value_heads_cast_fp16 = concat(axis = var_1677, interleave = value_heads_interleave_0, values = (var_1839_cast_fp16, var_1839_cast_fp16, var_1855_cast_fp16, var_1855_cast_fp16, var_1871_cast_fp16, var_1871_cast_fp16, var_1887_cast_fp16, var_1887_cast_fp16, var_1903_cast_fp16, var_1903_cast_fp16, var_1919_cast_fp16, var_1919_cast_fp16, var_1935_cast_fp16, var_1935_cast_fp16, var_1951_cast_fp16, var_1951_cast_fp16))[name = string("value_heads_cast_fp16")]; + fp16 var_1974_to_fp16 = const()[name = string("op_1974_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1975_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1974_to_fp16)[name = string("op_1975_cast_fp16")]; + bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; + bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1975_cast_fp16, y = key_heads_cast_fp16)[name = string("mh_w_17_cast_fp16")]; + tensor mh_w_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_436_cast_fp16)[name = string("mh_w_cast_fp16")]; + tensor var_1987_cast_fp16 = softmax(axis = var_1659, x = mh_w_cast_fp16)[name = string("op_1987_cast_fp16")]; + bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; + bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = value_heads_cast_fp16, y = var_1987_cast_fp16)[name = string("attn_cast_fp16")]; + tensor var_1992 = const()[name = string("op_1992"), val = tensor([1, -1, 1, 1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1992, x = attn_cast_fp16)[name = string("input_33_cast_fp16")]; + string obj_pad_type_0 = const()[name = string("obj_pad_type_0"), val = string("valid")]; + tensor obj_strides_0 = const()[name = string("obj_strides_0"), val = tensor([1, 1])]; + tensor obj_pad_0 = const()[name = string("obj_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_dilations_0 = const()[name = string("obj_dilations_0"), val = tensor([1, 1])]; + int32 obj_groups_0 = const()[name = string("obj_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69265472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71362688))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("obj_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_cast_fp16)[name = string("inputs_39_cast_fp16")]; + tensor inputs_sq_39_cast_fp16 = mul(x = inputs_39_cast_fp16, y = inputs_39_cast_fp16)[name = string("inputs_sq_39_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = inputs_sq_39_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_2010_to_fp16 = const()[name = string("op_2010_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2011_cast_fp16 = add(x = variance_39_cast_fp16, y = var_2010_to_fp16)[name = string("op_2011_cast_fp16")]; + fp32 var_2012_epsilon_0 = const()[name = string("op_2012_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2012_cast_fp16 = rsqrt(epsilon = var_2012_epsilon_0, x = var_2011_cast_fp16)[name = string("op_2012_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = mul(x = inputs_39_cast_fp16, y = var_2012_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + tensor w_39_to_fp16 = const()[name = string("w_39_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71363264)))]; + tensor input_35_cast_fp16 = mul(x = w_39_to_fp16, y = hidden_states_47_cast_fp16)[name = string("input_35_cast_fp16")]; + string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; + tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; + tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; + int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; + tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71365376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74511168))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")]; + tensor var_2026_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_2026_cast_fp16")]; + string var_2032_pad_type_0 = const()[name = string("op_2032_pad_type_0"), val = string("valid")]; + tensor var_2032_strides_0 = const()[name = string("op_2032_strides_0"), val = tensor([1, 1])]; + tensor var_2032_pad_0 = const()[name = string("op_2032_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2032_dilations_0 = const()[name = string("op_2032_dilations_0"), val = tensor([1, 1])]; + int32 var_2032_groups_0 = const()[name = string("op_2032_groups_0"), val = int32(1)]; + tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74511744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77657536))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_2032_cast_fp16 = conv(dilations = var_2032_dilations_0, groups = var_2032_groups_0, pad = var_2032_pad_0, pad_type = var_2032_pad_type_0, strides = var_2032_strides_0, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_2032_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_2026_cast_fp16, y = var_2032_cast_fp16)[name = string("input_39_cast_fp16")]; + string hidden_states_49_pad_type_0 = const()[name = string("hidden_states_49_pad_type_0"), val = string("valid")]; + tensor hidden_states_49_strides_0 = const()[name = string("hidden_states_49_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_49_pad_0 = const()[name = string("hidden_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_49_dilations_0 = const()[name = string("hidden_states_49_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_49_groups_0 = const()[name = string("hidden_states_49_groups_0"), val = int32(1)]; + tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77658112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80803904))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_49_cast_fp16 = conv(dilations = hidden_states_49_dilations_0, groups = hidden_states_49_groups_0, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = hidden_states_49_strides_0, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("inputs_cast_fp16")]; + tensor inputs_sq_cast_fp16 = mul(x = inputs_cast_fp16, y = inputs_cast_fp16)[name = string("inputs_sq_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = inputs_sq_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_2053_to_fp16 = const()[name = string("op_2053_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2054_cast_fp16 = add(x = variance_cast_fp16, y = var_2053_to_fp16)[name = string("op_2054_cast_fp16")]; + fp32 var_2055_epsilon_0 = const()[name = string("op_2055_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2055_cast_fp16 = rsqrt(epsilon = var_2055_epsilon_0, x = var_2054_cast_fp16)[name = string("op_2055_cast_fp16")]; + tensor hidden_states_cast_fp16 = mul(x = inputs_cast_fp16, y = var_2055_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor w_to_fp16 = const()[name = string("w_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80804480)))]; + tensor input_cast_fp16 = mul(x = w_to_fp16, y = hidden_states_cast_fp16)[name = string("input_cast_fp16")]; + string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; + tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1, 1])]; + tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1, 1])]; + int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; + tensor lm_heads_0_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80806592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82903808))))[name = string("lm_heads_0_weight_to_fp16_palettized")]; + tensor logits_1_cast_fp16 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = lm_heads_0_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_1_cast_fp16")]; + tensor var_2072_axes_0 = const()[name = string("op_2072_axes_0"), val = tensor([3])]; + tensor var_2072_cast_fp16 = squeeze(axes = var_2072_axes_0, x = logits_1_cast_fp16)[name = string("op_2072_cast_fp16")]; + string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; + tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1, 1])]; + tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1, 1])]; + int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; + tensor lm_heads_1_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82904384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001600))))[name = string("lm_heads_1_weight_to_fp16_palettized")]; + tensor logits_3_cast_fp16 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = lm_heads_1_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_3_cast_fp16")]; + tensor var_2088_axes_0 = const()[name = string("op_2088_axes_0"), val = tensor([3])]; + tensor var_2088_cast_fp16 = squeeze(axes = var_2088_axes_0, x = logits_3_cast_fp16)[name = string("op_2088_cast_fp16")]; + string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; + tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1, 1])]; + tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1, 1])]; + int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; + tensor lm_heads_2_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85002176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87099392))))[name = string("lm_heads_2_weight_to_fp16_palettized")]; + tensor logits_5_cast_fp16 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = lm_heads_2_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_5_cast_fp16")]; + tensor var_2104_axes_0 = const()[name = string("op_2104_axes_0"), val = tensor([3])]; + tensor var_2104_cast_fp16 = squeeze(axes = var_2104_axes_0, x = logits_5_cast_fp16)[name = string("op_2104_cast_fp16")]; + string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; + tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1, 1])]; + tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1, 1])]; + int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; + tensor lm_heads_3_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87099968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89197184))))[name = string("lm_heads_3_weight_to_fp16_palettized")]; + tensor logits_7_cast_fp16 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = lm_heads_3_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_7_cast_fp16")]; + tensor var_2120_axes_0 = const()[name = string("op_2120_axes_0"), val = tensor([3])]; + tensor var_2120_cast_fp16 = squeeze(axes = var_2120_axes_0, x = logits_7_cast_fp16)[name = string("op_2120_cast_fp16")]; + string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; + tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1, 1])]; + tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1, 1])]; + int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; + tensor lm_heads_4_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89197760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294976))))[name = string("lm_heads_4_weight_to_fp16_palettized")]; + tensor logits_9_cast_fp16 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = lm_heads_4_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_9_cast_fp16")]; + tensor var_2136_axes_0 = const()[name = string("op_2136_axes_0"), val = tensor([3])]; + tensor var_2136_cast_fp16 = squeeze(axes = var_2136_axes_0, x = logits_9_cast_fp16)[name = string("op_2136_cast_fp16")]; + string logits_11_pad_type_0 = const()[name = string("logits_11_pad_type_0"), val = string("valid")]; + tensor logits_11_strides_0 = const()[name = string("logits_11_strides_0"), val = tensor([1, 1])]; + tensor logits_11_pad_0 = const()[name = string("logits_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_11_dilations_0 = const()[name = string("logits_11_dilations_0"), val = tensor([1, 1])]; + int32 logits_11_groups_0 = const()[name = string("logits_11_groups_0"), val = int32(1)]; + tensor lm_heads_5_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91295552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93392768))))[name = string("lm_heads_5_weight_to_fp16_palettized")]; + tensor logits_11_cast_fp16 = conv(dilations = logits_11_dilations_0, groups = logits_11_groups_0, pad = logits_11_pad_0, pad_type = logits_11_pad_type_0, strides = logits_11_strides_0, weight = lm_heads_5_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_11_cast_fp16")]; + tensor var_2152_axes_0 = const()[name = string("op_2152_axes_0"), val = tensor([3])]; + tensor var_2152_cast_fp16 = squeeze(axes = var_2152_axes_0, x = logits_11_cast_fp16)[name = string("op_2152_cast_fp16")]; + string logits_13_pad_type_0 = const()[name = string("logits_13_pad_type_0"), val = string("valid")]; + tensor logits_13_strides_0 = const()[name = string("logits_13_strides_0"), val = tensor([1, 1])]; + tensor logits_13_pad_0 = const()[name = string("logits_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_13_dilations_0 = const()[name = string("logits_13_dilations_0"), val = tensor([1, 1])]; + int32 logits_13_groups_0 = const()[name = string("logits_13_groups_0"), val = int32(1)]; + tensor lm_heads_6_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93393344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95490560))))[name = string("lm_heads_6_weight_to_fp16_palettized")]; + tensor logits_13_cast_fp16 = conv(dilations = logits_13_dilations_0, groups = logits_13_groups_0, pad = logits_13_pad_0, pad_type = logits_13_pad_type_0, strides = logits_13_strides_0, weight = lm_heads_6_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_13_cast_fp16")]; + tensor var_2168_axes_0 = const()[name = string("op_2168_axes_0"), val = tensor([3])]; + tensor var_2168_cast_fp16 = squeeze(axes = var_2168_axes_0, x = logits_13_cast_fp16)[name = string("op_2168_cast_fp16")]; + string logits_15_pad_type_0 = const()[name = string("logits_15_pad_type_0"), val = string("valid")]; + tensor logits_15_strides_0 = const()[name = string("logits_15_strides_0"), val = tensor([1, 1])]; + tensor logits_15_pad_0 = const()[name = string("logits_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_15_dilations_0 = const()[name = string("logits_15_dilations_0"), val = tensor([1, 1])]; + int32 logits_15_groups_0 = const()[name = string("logits_15_groups_0"), val = int32(1)]; + tensor lm_heads_7_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95491136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97588352))))[name = string("lm_heads_7_weight_to_fp16_palettized")]; + tensor logits_15_cast_fp16 = conv(dilations = logits_15_dilations_0, groups = logits_15_groups_0, pad = logits_15_pad_0, pad_type = logits_15_pad_type_0, strides = logits_15_strides_0, weight = lm_heads_7_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_15_cast_fp16")]; + tensor var_2184_axes_0 = const()[name = string("op_2184_axes_0"), val = tensor([3])]; + tensor var_2184_cast_fp16 = squeeze(axes = var_2184_axes_0, x = logits_15_cast_fp16)[name = string("op_2184_cast_fp16")]; + string logits_17_pad_type_0 = const()[name = string("logits_17_pad_type_0"), val = string("valid")]; + tensor logits_17_strides_0 = const()[name = string("logits_17_strides_0"), val = tensor([1, 1])]; + tensor logits_17_pad_0 = const()[name = string("logits_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_17_dilations_0 = const()[name = string("logits_17_dilations_0"), val = tensor([1, 1])]; + int32 logits_17_groups_0 = const()[name = string("logits_17_groups_0"), val = int32(1)]; + tensor lm_heads_8_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97588928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99686144))))[name = string("lm_heads_8_weight_to_fp16_palettized")]; + tensor logits_17_cast_fp16 = conv(dilations = logits_17_dilations_0, groups = logits_17_groups_0, pad = logits_17_pad_0, pad_type = logits_17_pad_type_0, strides = logits_17_strides_0, weight = lm_heads_8_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_17_cast_fp16")]; + tensor var_2200_axes_0 = const()[name = string("op_2200_axes_0"), val = tensor([3])]; + tensor var_2200_cast_fp16 = squeeze(axes = var_2200_axes_0, x = logits_17_cast_fp16)[name = string("op_2200_cast_fp16")]; + string logits_19_pad_type_0 = const()[name = string("logits_19_pad_type_0"), val = string("valid")]; + tensor logits_19_strides_0 = const()[name = string("logits_19_strides_0"), val = tensor([1, 1])]; + tensor logits_19_pad_0 = const()[name = string("logits_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_19_dilations_0 = const()[name = string("logits_19_dilations_0"), val = tensor([1, 1])]; + int32 logits_19_groups_0 = const()[name = string("logits_19_groups_0"), val = int32(1)]; + tensor lm_heads_9_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99686720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101783936))))[name = string("lm_heads_9_weight_to_fp16_palettized")]; + tensor logits_19_cast_fp16 = conv(dilations = logits_19_dilations_0, groups = logits_19_groups_0, pad = logits_19_pad_0, pad_type = logits_19_pad_type_0, strides = logits_19_strides_0, weight = lm_heads_9_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_19_cast_fp16")]; + tensor var_2216_axes_0 = const()[name = string("op_2216_axes_0"), val = tensor([3])]; + tensor var_2216_cast_fp16 = squeeze(axes = var_2216_axes_0, x = logits_19_cast_fp16)[name = string("op_2216_cast_fp16")]; + string logits_21_pad_type_0 = const()[name = string("logits_21_pad_type_0"), val = string("valid")]; + tensor logits_21_strides_0 = const()[name = string("logits_21_strides_0"), val = tensor([1, 1])]; + tensor logits_21_pad_0 = const()[name = string("logits_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_21_dilations_0 = const()[name = string("logits_21_dilations_0"), val = tensor([1, 1])]; + int32 logits_21_groups_0 = const()[name = string("logits_21_groups_0"), val = int32(1)]; + tensor lm_heads_10_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101784512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103881728))))[name = string("lm_heads_10_weight_to_fp16_palettized")]; + tensor logits_21_cast_fp16 = conv(dilations = logits_21_dilations_0, groups = logits_21_groups_0, pad = logits_21_pad_0, pad_type = logits_21_pad_type_0, strides = logits_21_strides_0, weight = lm_heads_10_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_21_cast_fp16")]; + tensor var_2232_axes_0 = const()[name = string("op_2232_axes_0"), val = tensor([3])]; + tensor var_2232_cast_fp16 = squeeze(axes = var_2232_axes_0, x = logits_21_cast_fp16)[name = string("op_2232_cast_fp16")]; + string logits_23_pad_type_0 = const()[name = string("logits_23_pad_type_0"), val = string("valid")]; + tensor logits_23_strides_0 = const()[name = string("logits_23_strides_0"), val = tensor([1, 1])]; + tensor logits_23_pad_0 = const()[name = string("logits_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_23_dilations_0 = const()[name = string("logits_23_dilations_0"), val = tensor([1, 1])]; + int32 logits_23_groups_0 = const()[name = string("logits_23_groups_0"), val = int32(1)]; + tensor lm_heads_11_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103882304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105979520))))[name = string("lm_heads_11_weight_to_fp16_palettized")]; + tensor logits_23_cast_fp16 = conv(dilations = logits_23_dilations_0, groups = logits_23_groups_0, pad = logits_23_pad_0, pad_type = logits_23_pad_type_0, strides = logits_23_strides_0, weight = lm_heads_11_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_23_cast_fp16")]; + tensor var_2248_axes_0 = const()[name = string("op_2248_axes_0"), val = tensor([3])]; + tensor var_2248_cast_fp16 = squeeze(axes = var_2248_axes_0, x = logits_23_cast_fp16)[name = string("op_2248_cast_fp16")]; + string logits_25_pad_type_0 = const()[name = string("logits_25_pad_type_0"), val = string("valid")]; + tensor logits_25_strides_0 = const()[name = string("logits_25_strides_0"), val = tensor([1, 1])]; + tensor logits_25_pad_0 = const()[name = string("logits_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_25_dilations_0 = const()[name = string("logits_25_dilations_0"), val = tensor([1, 1])]; + int32 logits_25_groups_0 = const()[name = string("logits_25_groups_0"), val = int32(1)]; + tensor lm_heads_12_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105980096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108077312))))[name = string("lm_heads_12_weight_to_fp16_palettized")]; + tensor logits_25_cast_fp16 = conv(dilations = logits_25_dilations_0, groups = logits_25_groups_0, pad = logits_25_pad_0, pad_type = logits_25_pad_type_0, strides = logits_25_strides_0, weight = lm_heads_12_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_25_cast_fp16")]; + tensor var_2264_axes_0 = const()[name = string("op_2264_axes_0"), val = tensor([3])]; + tensor var_2264_cast_fp16 = squeeze(axes = var_2264_axes_0, x = logits_25_cast_fp16)[name = string("op_2264_cast_fp16")]; + string logits_27_pad_type_0 = const()[name = string("logits_27_pad_type_0"), val = string("valid")]; + tensor logits_27_strides_0 = const()[name = string("logits_27_strides_0"), val = tensor([1, 1])]; + tensor logits_27_pad_0 = const()[name = string("logits_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_27_dilations_0 = const()[name = string("logits_27_dilations_0"), val = tensor([1, 1])]; + int32 logits_27_groups_0 = const()[name = string("logits_27_groups_0"), val = int32(1)]; + tensor lm_heads_13_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108077888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175104))))[name = string("lm_heads_13_weight_to_fp16_palettized")]; + tensor logits_27_cast_fp16 = conv(dilations = logits_27_dilations_0, groups = logits_27_groups_0, pad = logits_27_pad_0, pad_type = logits_27_pad_type_0, strides = logits_27_strides_0, weight = lm_heads_13_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_27_cast_fp16")]; + tensor var_2280_axes_0 = const()[name = string("op_2280_axes_0"), val = tensor([3])]; + tensor var_2280_cast_fp16 = squeeze(axes = var_2280_axes_0, x = logits_27_cast_fp16)[name = string("op_2280_cast_fp16")]; + string logits_29_pad_type_0 = const()[name = string("logits_29_pad_type_0"), val = string("valid")]; + tensor logits_29_strides_0 = const()[name = string("logits_29_strides_0"), val = tensor([1, 1])]; + tensor logits_29_pad_0 = const()[name = string("logits_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_29_dilations_0 = const()[name = string("logits_29_dilations_0"), val = tensor([1, 1])]; + int32 logits_29_groups_0 = const()[name = string("logits_29_groups_0"), val = int32(1)]; + tensor lm_heads_14_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112272896))))[name = string("lm_heads_14_weight_to_fp16_palettized")]; + tensor logits_29_cast_fp16 = conv(dilations = logits_29_dilations_0, groups = logits_29_groups_0, pad = logits_29_pad_0, pad_type = logits_29_pad_type_0, strides = logits_29_strides_0, weight = lm_heads_14_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_29_cast_fp16")]; + tensor var_2296_axes_0 = const()[name = string("op_2296_axes_0"), val = tensor([3])]; + tensor var_2296_cast_fp16 = squeeze(axes = var_2296_axes_0, x = logits_29_cast_fp16)[name = string("op_2296_cast_fp16")]; + bool var_2302_interleave_0 = const()[name = string("op_2302_interleave_0"), val = bool(false)]; + int32 const_119 = const()[name = string("const_119"), val = int32(2)]; + tensor var_2302_cast_fp16 = concat(axis = const_119, interleave = var_2302_interleave_0, values = (var_2072_cast_fp16, var_2088_cast_fp16, var_2104_cast_fp16, var_2120_cast_fp16, var_2136_cast_fp16, var_2152_cast_fp16, var_2168_cast_fp16, var_2184_cast_fp16, var_2200_cast_fp16, var_2216_cast_fp16, var_2232_cast_fp16, var_2248_cast_fp16, var_2264_cast_fp16, var_2280_cast_fp16, var_2296_cast_fp16))[name = string("op_2302_cast_fp16")]; + int32 var_2304 = const()[name = string("op_2304"), val = int32(1)]; + bool var_2305_interleave_0 = const()[name = string("op_2305_interleave_0"), val = bool(false)]; + tensor key_cache_updates = concat(axis = var_2304, interleave = var_2305_interleave_0, values = (current_key_3_cast_fp16, current_key_7_cast_fp16, current_key_11_cast_fp16, current_key_15_cast_fp16, current_key_cast_fp16))[name = string("op_2305_cast_fp16")]; + int32 var_2307 = const()[name = string("op_2307"), val = int32(1)]; + bool var_2308_interleave_0 = const()[name = string("op_2308_interleave_0"), val = bool(false)]; + tensor value_cache_updates = concat(axis = var_2307, interleave = var_2308_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_cast_fp16))[name = string("op_2308_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 2, 1])]; + tensor all_logits = transpose(perm = transpose_0_perm_0, x = var_2302_cast_fp16)[name = string("transpose_0")]; + } -> (all_logits, key_cache_updates, value_cache_updates); +} \ No newline at end of file