diff --git "a/qwen3_tts/multi_code_decoder/12hz-0.6b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" "b/qwen3_tts/multi_code_decoder/12hz-0.6b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/qwen3_tts/multi_code_decoder/12hz-0.6b-customvoice/W8A16/MultiCodeDecoder.mlmodelc/model.mil" @@ -0,0 +1,1369 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { + int32 pos_cos_batch_dims_0 = const()[name = string("pos_cos_batch_dims_0"), val = int32(0)]; + bool pos_cos_validate_indices_0 = const()[name = string("pos_cos_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_cos_weight_to_fp16 = const()[name = string("position_embeddings_cos_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + string cache_length_to_int16_dtype_0 = const()[name = string("cache_length_to_int16_dtype_0"), val = string("int16")]; + string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor cache_length_to_int16 = cast(dtype = cache_length_to_int16_dtype_0, x = cache_length)[name = string("cast_5")]; + tensor cast_111 = cast(dtype = cast_111_dtype_0, x = cache_length_to_int16)[name = string("cast_4")]; + tensor greater_equal_0 = greater_equal(x = cast_111, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(16)]; + tensor add_0 = add(x = cast_111, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = cast_111, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; + string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; + int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; + tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; + tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; + tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; + int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(16)]; + tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; + tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; + int32 pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)]; + tensor pos_cos_cast_fp16_cast_uint16_cast_uint16 = gather(axis = pos_cos_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = pos_cos_batch_dims_0, indices = select_0_1, validate_indices = pos_cos_validate_indices_0, x = position_embeddings_cos_weight_to_fp16)[name = string("pos_cos_cast_fp16_cast_uint16_cast_uint16")]; + tensor obj_7_axes_0 = const()[name = string("obj_7_axes_0"), val = tensor([2])]; + tensor obj_7_cast_fp16 = expand_dims(axes = obj_7_axes_0, x = pos_cos_cast_fp16_cast_uint16_cast_uint16)[name = string("obj_7_cast_fp16")]; + int32 pos_sin_axis_0 = const()[name = string("pos_sin_axis_0"), val = int32(0)]; + int32 pos_sin_batch_dims_0 = const()[name = string("pos_sin_batch_dims_0"), val = int32(0)]; + bool pos_sin_validate_indices_0 = const()[name = string("pos_sin_validate_indices_0"), val = bool(false)]; + tensor position_embeddings_sin_weight_to_fp16 = const()[name = string("position_embeddings_sin_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4224)))]; + string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; + tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_1")]; + tensor pos_sin_cast_fp16_cast_uint16 = gather(axis = pos_sin_axis_0, batch_dims = pos_sin_batch_dims_0, indices = cache_length_to_uint16, validate_indices = pos_sin_validate_indices_0, x = position_embeddings_sin_weight_to_fp16)[name = string("pos_sin_cast_fp16_cast_uint16")]; + tensor obj_9_axes_0 = const()[name = string("obj_9_axes_0"), val = tensor([2])]; + tensor obj_9_cast_fp16 = expand_dims(axes = obj_9_axes_0, x = pos_sin_cast_fp16_cast_uint16)[name = string("obj_9_cast_fp16")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor([1024, 1024, 1024, 1024, 1024])]; + int32 var_84_axis_0 = const()[name = string("op_84_axis_0"), val = int32(1)]; + tensor var_84_cast_fp16_0, tensor var_84_cast_fp16_1, tensor var_84_cast_fp16_2, tensor var_84_cast_fp16_3, tensor var_84_cast_fp16_4 = split(axis = var_84_axis_0, split_sizes = tile_0, x = key_cache)[name = string("op_84_cast_fp16")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor([1024, 1024, 1024, 1024, 1024])]; + int32 var_92_axis_0 = const()[name = string("op_92_axis_0"), val = int32(1)]; + tensor var_92_cast_fp16_0, tensor var_92_cast_fp16_1, tensor var_92_cast_fp16_2, tensor var_92_cast_fp16_3, tensor var_92_cast_fp16_4 = split(axis = var_92_axis_0, split_sizes = tile_1, x = value_cache)[name = string("op_92_cast_fp16")]; + int32 var_99 = const()[name = string("op_99"), val = int32(3)]; + int32 var_109 = const()[name = string("op_109"), val = int32(-2)]; + int32 var_117 = const()[name = string("op_117"), val = int32(1)]; + tensor inputs_sq_1_cast_fp16 = mul(x = input_embeds, y = input_embeds)[name = string("inputs_sq_1_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = inputs_sq_1_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_130_cast_fp16 = add(x = variance_1_cast_fp16, y = var_129_to_fp16)[name = string("op_130_cast_fp16")]; + fp32 var_131_epsilon_0 = const()[name = string("op_131_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_131_cast_fp16 = rsqrt(epsilon = var_131_epsilon_0, x = var_130_cast_fp16)[name = string("op_131_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = mul(x = input_embeds, y = var_131_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; + tensor w_1_to_fp16 = const()[name = string("w_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8384)))]; + tensor obj_1_cast_fp16 = mul(x = w_1_to_fp16, y = hidden_states_1_cast_fp16)[name = string("obj_1_cast_fp16")]; + string query_1_pad_type_0 = const()[name = string("query_1_pad_type_0"), val = string("valid")]; + tensor query_1_strides_0 = const()[name = string("query_1_strides_0"), val = tensor([1, 1])]; + tensor query_1_pad_0 = const()[name = string("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_1_dilations_0 = const()[name = string("query_1_dilations_0"), val = tensor([1, 1])]; + int32 query_1_groups_0 = const()[name = string("query_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2107712))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2108288)))]; + tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("query_1_cast_fp16")]; + string current_key_1_pad_type_0 = const()[name = string("current_key_1_pad_type_0"), val = string("valid")]; + tensor current_key_1_strides_0 = const()[name = string("current_key_1_strides_0"), val = tensor([1, 1])]; + tensor current_key_1_pad_0 = const()[name = string("current_key_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_1_dilations_0 = const()[name = string("current_key_1_dilations_0"), val = tensor([1, 1])]; + int32 current_key_1_groups_0 = const()[name = string("current_key_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2112448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3161088))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_key_1_cast_fp16")]; + string current_value_1_pad_type_0 = const()[name = string("current_value_1_pad_type_0"), val = string("valid")]; + tensor current_value_1_strides_0 = const()[name = string("current_value_1_strides_0"), val = tensor([1, 1])]; + tensor current_value_1_pad_0 = const()[name = string("current_value_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_1_dilations_0 = const()[name = string("current_value_1_dilations_0"), val = tensor([1, 1])]; + int32 current_value_1_groups_0 = const()[name = string("current_value_1_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3161664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4210304))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4210880)))]; + tensor current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("current_value_1_cast_fp16")]; + tensor var_168 = const()[name = string("op_168"), val = tensor([16, 128, 1, 1])]; + tensor inputs_1_cast_fp16 = reshape(shape = var_168, x = query_1_cast_fp16)[name = string("inputs_1_cast_fp16")]; + tensor inputs_sq_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = inputs_1_cast_fp16)[name = string("inputs_sq_3_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = inputs_sq_3_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_174_to_fp16 = const()[name = string("op_174_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_175_cast_fp16 = add(x = variance_3_cast_fp16, y = var_174_to_fp16)[name = string("op_175_cast_fp16")]; + fp32 var_176_epsilon_0 = const()[name = string("op_176_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_176_cast_fp16 = rsqrt(epsilon = var_176_epsilon_0, x = var_175_cast_fp16)[name = string("op_176_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = var_176_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor w_3_to_fp16 = const()[name = string("w_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4212992)))]; + tensor query_normed_1_cast_fp16 = mul(x = w_3_to_fp16, y = hidden_states_3_cast_fp16)[name = string("query_normed_1_cast_fp16")]; + tensor var_184 = const()[name = string("op_184"), val = tensor([8, 128, 1, 1])]; + tensor inputs_3_cast_fp16 = reshape(shape = var_184, x = current_key_1_cast_fp16)[name = string("inputs_3_cast_fp16")]; + tensor inputs_sq_5_cast_fp16 = mul(x = inputs_3_cast_fp16, y = inputs_3_cast_fp16)[name = string("inputs_sq_5_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = inputs_sq_5_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_191_cast_fp16 = add(x = variance_5_cast_fp16, y = var_190_to_fp16)[name = string("op_191_cast_fp16")]; + fp32 var_192_epsilon_0 = const()[name = string("op_192_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_192_cast_fp16 = rsqrt(epsilon = var_192_epsilon_0, x = var_191_cast_fp16)[name = string("op_192_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = mul(x = inputs_3_cast_fp16, y = var_192_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor w_5_to_fp16 = const()[name = string("w_5_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4213312)))]; + tensor current_key_normed_1_cast_fp16 = mul(x = w_5_to_fp16, y = hidden_states_5_cast_fp16)[name = string("current_key_normed_1_cast_fp16")]; + tensor var_210 = const()[name = string("op_210"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_210, x = query_normed_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; + tensor var_212 = const()[name = string("op_212"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_1_cast_fp16 = reshape(shape = var_212, x = current_key_normed_1_cast_fp16)[name = string("mh_k_1_cast_fp16")]; + tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([1])]; + tensor cos_1_cast_fp16 = expand_dims(axes = cos_1_axes_0, x = obj_7_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([1])]; + tensor sin_1_cast_fp16 = expand_dims(axes = sin_1_axes_0, x = obj_9_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor var_216_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_216_cast_fp16")]; + tensor var_221_begin_0 = const()[name = string("op_221_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_221_end_0 = const()[name = string("op_221_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_221_end_mask_0 = const()[name = string("op_221_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_221_cast_fp16 = slice_by_index(begin = var_221_begin_0, end = var_221_end_0, end_mask = var_221_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_221_cast_fp16")]; + tensor var_227_begin_0 = const()[name = string("op_227_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_227_end_0 = const()[name = string("op_227_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_227_end_mask_0 = const()[name = string("op_227_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = mh_q_1_cast_fp16)[name = string("op_227_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_229_cast_fp16 = mul(x = var_227_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_229_cast_fp16")]; + bool var_231_interleave_0 = const()[name = string("op_231_interleave_0"), val = bool(false)]; + tensor var_231_cast_fp16 = concat(axis = var_109, interleave = var_231_interleave_0, values = (var_229_cast_fp16, var_221_cast_fp16))[name = string("op_231_cast_fp16")]; + tensor var_232_cast_fp16 = mul(x = var_231_cast_fp16, y = sin_1_cast_fp16)[name = string("op_232_cast_fp16")]; + tensor mh_q_3_cast_fp16 = add(x = var_216_cast_fp16, y = var_232_cast_fp16)[name = string("mh_q_3_cast_fp16")]; + tensor var_234_cast_fp16 = mul(x = mh_k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_234_cast_fp16")]; + tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_239_end_0 = const()[name = string("op_239_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor var_245_begin_0 = const()[name = string("op_245_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_245_end_0 = const()[name = string("op_245_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_245_end_mask_0 = const()[name = string("op_245_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_245_cast_fp16 = slice_by_index(begin = var_245_begin_0, end = var_245_end_0, end_mask = var_245_end_mask_0, x = mh_k_1_cast_fp16)[name = string("op_245_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_247_cast_fp16 = mul(x = var_245_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_247_cast_fp16")]; + bool var_249_interleave_0 = const()[name = string("op_249_interleave_0"), val = bool(false)]; + tensor var_249_cast_fp16 = concat(axis = var_109, interleave = var_249_interleave_0, values = (var_247_cast_fp16, var_239_cast_fp16))[name = string("op_249_cast_fp16")]; + tensor var_250_cast_fp16 = mul(x = var_249_cast_fp16, y = sin_1_cast_fp16)[name = string("op_250_cast_fp16")]; + tensor mh_k_3_cast_fp16 = add(x = var_234_cast_fp16, y = var_250_cast_fp16)[name = string("mh_k_3_cast_fp16")]; + tensor var_254 = const()[name = string("op_254"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_3_cast_fp16 = reshape(shape = var_254, x = mh_k_3_cast_fp16)[name = string("current_key_3_cast_fp16")]; + tensor var_257_axes_0 = const()[name = string("op_257_axes_0"), val = tensor([1])]; + tensor var_257_cast_fp16 = expand_dims(axes = var_257_axes_0, x = kv_cache_update_mask)[name = string("op_257_cast_fp16")]; + tensor var_258_axes_0 = const()[name = string("op_258_axes_0"), val = tensor([2])]; + tensor var_258_cast_fp16 = expand_dims(axes = var_258_axes_0, x = var_257_cast_fp16)[name = string("op_258_cast_fp16")]; + fp16 var_110_to_fp16 = const()[name = string("op_110_to_fp16"), val = fp16(0x1p+0)]; + tensor var_260_cast_fp16 = sub(x = var_110_to_fp16, y = var_258_cast_fp16)[name = string("op_260_cast_fp16")]; + tensor var_261_cast_fp16 = mul(x = var_84_cast_fp16_0, y = var_260_cast_fp16)[name = string("op_261_cast_fp16")]; + tensor var_262_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_258_cast_fp16)[name = string("op_262_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_261_cast_fp16, y = var_262_cast_fp16)[name = string("key_3_cast_fp16")]; + tensor var_265_cast_fp16 = mul(x = var_92_cast_fp16_0, y = var_260_cast_fp16)[name = string("op_265_cast_fp16")]; + tensor var_266_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_258_cast_fp16)[name = string("op_266_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_265_cast_fp16, y = var_266_cast_fp16)[name = string("value_1_cast_fp16")]; + tensor var_270 = const()[name = string("op_270"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_1_cast_fp16 = reshape(shape = var_270, x = key_3_cast_fp16)[name = string("key_heads_1_cast_fp16")]; + tensor var_272 = const()[name = string("op_272"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_1_cast_fp16 = reshape(shape = var_272, x = value_1_cast_fp16)[name = string("value_heads_1_cast_fp16")]; + tensor var_275_begin_0 = const()[name = string("op_275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_275_end_0 = const()[name = string("op_275_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_275_end_mask_0 = const()[name = string("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = string("op_279_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_279_end_0 = const()[name = string("op_279_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_279_end_mask_0 = const()[name = string("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_279_cast_fp16")]; + tensor var_291_begin_0 = const()[name = string("op_291_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_291_end_0 = const()[name = string("op_291_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_291_end_mask_0 = const()[name = string("op_291_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_291_cast_fp16 = slice_by_index(begin = var_291_begin_0, end = var_291_end_0, end_mask = var_291_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_291_cast_fp16")]; + tensor var_295_begin_0 = const()[name = string("op_295_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_295_end_0 = const()[name = string("op_295_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_295_end_mask_0 = const()[name = string("op_295_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_295_cast_fp16")]; + tensor var_307_begin_0 = const()[name = string("op_307_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_307_end_0 = const()[name = string("op_307_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_307_end_mask_0 = const()[name = string("op_307_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_307_cast_fp16 = slice_by_index(begin = var_307_begin_0, end = var_307_end_0, end_mask = var_307_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_307_cast_fp16")]; + tensor var_311_begin_0 = const()[name = string("op_311_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_311_end_0 = const()[name = string("op_311_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_311_end_mask_0 = const()[name = string("op_311_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_311_cast_fp16")]; + tensor var_323_begin_0 = const()[name = string("op_323_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_323_end_0 = const()[name = string("op_323_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_323_end_mask_0 = const()[name = string("op_323_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_323_cast_fp16 = slice_by_index(begin = var_323_begin_0, end = var_323_end_0, end_mask = var_323_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_323_cast_fp16")]; + tensor var_327_begin_0 = const()[name = string("op_327_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_327_end_0 = const()[name = string("op_327_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_327_end_mask_0 = const()[name = string("op_327_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_327_cast_fp16 = slice_by_index(begin = var_327_begin_0, end = var_327_end_0, end_mask = var_327_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_327_cast_fp16")]; + tensor var_339_begin_0 = const()[name = string("op_339_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_339_end_0 = const()[name = string("op_339_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_339_end_mask_0 = const()[name = string("op_339_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_339_cast_fp16")]; + tensor var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_343_end_0 = const()[name = string("op_343_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_355_begin_0 = const()[name = string("op_355_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_355_end_0 = const()[name = string("op_355_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_355_end_mask_0 = const()[name = string("op_355_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_355_cast_fp16 = slice_by_index(begin = var_355_begin_0, end = var_355_end_0, end_mask = var_355_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_355_cast_fp16")]; + tensor var_359_begin_0 = const()[name = string("op_359_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_359_end_0 = const()[name = string("op_359_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_359_end_mask_0 = const()[name = string("op_359_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_359_cast_fp16 = slice_by_index(begin = var_359_begin_0, end = var_359_end_0, end_mask = var_359_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_359_cast_fp16")]; + tensor var_371_begin_0 = const()[name = string("op_371_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_371_end_0 = const()[name = string("op_371_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_371_end_mask_0 = const()[name = string("op_371_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_371_cast_fp16 = slice_by_index(begin = var_371_begin_0, end = var_371_end_0, end_mask = var_371_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_371_cast_fp16")]; + tensor var_375_begin_0 = const()[name = string("op_375_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_375_end_0 = const()[name = string("op_375_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_375_end_mask_0 = const()[name = string("op_375_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_375_cast_fp16 = slice_by_index(begin = var_375_begin_0, end = var_375_end_0, end_mask = var_375_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_375_cast_fp16")]; + tensor var_387_begin_0 = const()[name = string("op_387_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_387_end_0 = const()[name = string("op_387_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_387_end_mask_0 = const()[name = string("op_387_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_387_cast_fp16 = slice_by_index(begin = var_387_begin_0, end = var_387_end_0, end_mask = var_387_end_mask_0, x = key_heads_1_cast_fp16)[name = string("op_387_cast_fp16")]; + tensor var_391_begin_0 = const()[name = string("op_391_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_391_end_0 = const()[name = string("op_391_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_391_end_mask_0 = const()[name = string("op_391_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_391_cast_fp16 = slice_by_index(begin = var_391_begin_0, end = var_391_end_0, end_mask = var_391_end_mask_0, x = value_heads_1_cast_fp16)[name = string("op_391_cast_fp16")]; + bool key_heads_3_interleave_0 = const()[name = string("key_heads_3_interleave_0"), val = bool(false)]; + tensor key_heads_3_cast_fp16 = concat(axis = var_117, interleave = key_heads_3_interleave_0, values = (var_275_cast_fp16, var_275_cast_fp16, var_291_cast_fp16, var_291_cast_fp16, var_307_cast_fp16, var_307_cast_fp16, var_323_cast_fp16, var_323_cast_fp16, var_339_cast_fp16, var_339_cast_fp16, var_355_cast_fp16, var_355_cast_fp16, var_371_cast_fp16, var_371_cast_fp16, var_387_cast_fp16, var_387_cast_fp16))[name = string("key_heads_3_cast_fp16")]; + bool value_heads_3_interleave_0 = const()[name = string("value_heads_3_interleave_0"), val = bool(false)]; + tensor value_heads_3_cast_fp16 = concat(axis = var_117, interleave = value_heads_3_interleave_0, values = (var_279_cast_fp16, var_279_cast_fp16, var_295_cast_fp16, var_295_cast_fp16, var_311_cast_fp16, var_311_cast_fp16, var_327_cast_fp16, var_327_cast_fp16, var_343_cast_fp16, var_343_cast_fp16, var_359_cast_fp16, var_359_cast_fp16, var_375_cast_fp16, var_375_cast_fp16, var_391_cast_fp16, var_391_cast_fp16))[name = string("value_heads_3_cast_fp16")]; + fp16 var_414_to_fp16 = const()[name = string("op_414_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_415_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_414_to_fp16)[name = string("op_415_cast_fp16")]; + bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; + bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_415_cast_fp16, y = key_heads_3_cast_fp16)[name = string("mh_w_1_cast_fp16")]; + tensor var_423_axes_0 = const()[name = string("op_423_axes_0"), val = tensor([1])]; + tensor var_423_cast_fp16 = expand_dims(axes = var_423_axes_0, x = key_padding_mask)[name = string("op_423_cast_fp16")]; + tensor var_424_axes_0 = const()[name = string("op_424_axes_0"), val = tensor([2])]; + tensor var_424_cast_fp16 = expand_dims(axes = var_424_axes_0, x = var_423_cast_fp16)[name = string("op_424_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_424_cast_fp16)[name = string("mh_w_3_cast_fp16")]; + tensor var_427_cast_fp16 = softmax(axis = var_99, x = mh_w_3_cast_fp16)[name = string("op_427_cast_fp16")]; + bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; + bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = value_heads_3_cast_fp16, y = var_427_cast_fp16)[name = string("attn_1_cast_fp16")]; + tensor var_432 = const()[name = string("op_432"), val = tensor([1, -1, 1, 1])]; + tensor input_1_cast_fp16 = reshape(shape = var_432, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; + string obj_11_pad_type_0 = const()[name = string("obj_11_pad_type_0"), val = string("valid")]; + tensor obj_11_strides_0 = const()[name = string("obj_11_strides_0"), val = tensor([1, 1])]; + tensor obj_11_pad_0 = const()[name = string("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_11_dilations_0 = const()[name = string("obj_11_dilations_0"), val = tensor([1, 1])]; + int32 obj_11_groups_0 = const()[name = string("obj_11_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4213632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6310848))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_11_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("obj_11_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = input_embeds, y = obj_11_cast_fp16)[name = string("inputs_5_cast_fp16")]; + tensor inputs_sq_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = inputs_5_cast_fp16)[name = string("inputs_sq_7_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = inputs_sq_7_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_451_cast_fp16 = add(x = variance_7_cast_fp16, y = var_450_to_fp16)[name = string("op_451_cast_fp16")]; + fp32 var_452_epsilon_0 = const()[name = string("op_452_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_452_cast_fp16 = rsqrt(epsilon = var_452_epsilon_0, x = var_451_cast_fp16)[name = string("op_452_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = var_452_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor w_7_to_fp16 = const()[name = string("w_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6311424)))]; + tensor input_3_cast_fp16 = mul(x = w_7_to_fp16, y = hidden_states_7_cast_fp16)[name = string("input_3_cast_fp16")]; + string input_5_pad_type_0 = const()[name = string("input_5_pad_type_0"), val = string("valid")]; + tensor input_5_strides_0 = const()[name = string("input_5_strides_0"), val = tensor([1, 1])]; + tensor input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_5_dilations_0 = const()[name = string("input_5_dilations_0"), val = tensor([1, 1])]; + int32 input_5_groups_0 = const()[name = string("input_5_groups_0"), val = int32(1)]; + tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6313536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9459328))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_5_cast_fp16 = conv(dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor var_466_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_466_cast_fp16")]; + string var_472_pad_type_0 = const()[name = string("op_472_pad_type_0"), val = string("valid")]; + tensor var_472_strides_0 = const()[name = string("op_472_strides_0"), val = tensor([1, 1])]; + tensor var_472_pad_0 = const()[name = string("op_472_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_472_dilations_0 = const()[name = string("op_472_dilations_0"), val = tensor([1, 1])]; + int32 var_472_groups_0 = const()[name = string("op_472_groups_0"), val = int32(1)]; + tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9459904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12605696))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_472_cast_fp16 = conv(dilations = var_472_dilations_0, groups = var_472_groups_0, pad = var_472_pad_0, pad_type = var_472_pad_type_0, strides = var_472_strides_0, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_472_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_466_cast_fp16, y = var_472_cast_fp16)[name = string("input_7_cast_fp16")]; + string hidden_states_9_pad_type_0 = const()[name = string("hidden_states_9_pad_type_0"), val = string("valid")]; + tensor hidden_states_9_strides_0 = const()[name = string("hidden_states_9_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_0 = const()[name = string("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_9_dilations_0 = const()[name = string("hidden_states_9_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_9_groups_0 = const()[name = string("hidden_states_9_groups_0"), val = int32(1)]; + tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12606272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752064))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_9_cast_fp16 = conv(dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_7_cast_fp16")]; + int32 var_486 = const()[name = string("op_486"), val = int32(3)]; + int32 var_496 = const()[name = string("op_496"), val = int32(-2)]; + int32 var_504 = const()[name = string("op_504"), val = int32(1)]; + tensor inputs_sq_9_cast_fp16 = mul(x = inputs_7_cast_fp16, y = inputs_7_cast_fp16)[name = string("inputs_sq_9_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = inputs_sq_9_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_517_cast_fp16 = add(x = variance_9_cast_fp16, y = var_516_to_fp16)[name = string("op_517_cast_fp16")]; + fp32 var_518_epsilon_0 = const()[name = string("op_518_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_518_cast_fp16 = rsqrt(epsilon = var_518_epsilon_0, x = var_517_cast_fp16)[name = string("op_518_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = mul(x = inputs_7_cast_fp16, y = var_518_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor w_9_to_fp16 = const()[name = string("w_9_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752640)))]; + tensor obj_13_cast_fp16 = mul(x = w_9_to_fp16, y = hidden_states_11_cast_fp16)[name = string("obj_13_cast_fp16")]; + string query_7_pad_type_0 = const()[name = string("query_7_pad_type_0"), val = string("valid")]; + tensor query_7_strides_0 = const()[name = string("query_7_strides_0"), val = tensor([1, 1])]; + tensor query_7_pad_0 = const()[name = string("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_7_dilations_0 = const()[name = string("query_7_dilations_0"), val = tensor([1, 1])]; + int32 query_7_groups_0 = const()[name = string("query_7_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15754752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17851968))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_7_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("query_7_cast_fp16")]; + string current_key_5_pad_type_0 = const()[name = string("current_key_5_pad_type_0"), val = string("valid")]; + tensor current_key_5_strides_0 = const()[name = string("current_key_5_strides_0"), val = tensor([1, 1])]; + tensor current_key_5_pad_0 = const()[name = string("current_key_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_5_dilations_0 = const()[name = string("current_key_5_dilations_0"), val = tensor([1, 1])]; + int32 current_key_5_groups_0 = const()[name = string("current_key_5_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17852544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18901184))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_5_cast_fp16 = conv(dilations = current_key_5_dilations_0, groups = current_key_5_groups_0, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = current_key_5_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_key_5_cast_fp16")]; + string current_value_3_pad_type_0 = const()[name = string("current_value_3_pad_type_0"), val = string("valid")]; + tensor current_value_3_strides_0 = const()[name = string("current_value_3_strides_0"), val = tensor([1, 1])]; + tensor current_value_3_pad_0 = const()[name = string("current_value_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_3_dilations_0 = const()[name = string("current_value_3_dilations_0"), val = tensor([1, 1])]; + int32 current_value_3_groups_0 = const()[name = string("current_value_3_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18901760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19950400))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_3_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("current_value_3_cast_fp16")]; + tensor var_555 = const()[name = string("op_555"), val = tensor([16, 128, 1, 1])]; + tensor inputs_9_cast_fp16 = reshape(shape = var_555, x = query_7_cast_fp16)[name = string("inputs_9_cast_fp16")]; + tensor inputs_sq_11_cast_fp16 = mul(x = inputs_9_cast_fp16, y = inputs_9_cast_fp16)[name = string("inputs_sq_11_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = inputs_sq_11_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_561_to_fp16 = const()[name = string("op_561_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_562_cast_fp16 = add(x = variance_11_cast_fp16, y = var_561_to_fp16)[name = string("op_562_cast_fp16")]; + fp32 var_563_epsilon_0 = const()[name = string("op_563_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_563_cast_fp16 = rsqrt(epsilon = var_563_epsilon_0, x = var_562_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = mul(x = inputs_9_cast_fp16, y = var_563_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor w_11_to_fp16 = const()[name = string("w_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19950976)))]; + tensor query_normed_3_cast_fp16 = mul(x = w_11_to_fp16, y = hidden_states_13_cast_fp16)[name = string("query_normed_3_cast_fp16")]; + tensor var_571 = const()[name = string("op_571"), val = tensor([8, 128, 1, 1])]; + tensor inputs_11_cast_fp16 = reshape(shape = var_571, x = current_key_5_cast_fp16)[name = string("inputs_11_cast_fp16")]; + tensor inputs_sq_13_cast_fp16 = mul(x = inputs_11_cast_fp16, y = inputs_11_cast_fp16)[name = string("inputs_sq_13_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = inputs_sq_13_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_578_cast_fp16 = add(x = variance_13_cast_fp16, y = var_577_to_fp16)[name = string("op_578_cast_fp16")]; + fp32 var_579_epsilon_0 = const()[name = string("op_579_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_579_cast_fp16 = rsqrt(epsilon = var_579_epsilon_0, x = var_578_cast_fp16)[name = string("op_579_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = mul(x = inputs_11_cast_fp16, y = var_579_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor w_13_to_fp16 = const()[name = string("w_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19951296)))]; + tensor current_key_normed_3_cast_fp16 = mul(x = w_13_to_fp16, y = hidden_states_15_cast_fp16)[name = string("current_key_normed_3_cast_fp16")]; + tensor var_597 = const()[name = string("op_597"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_597, x = query_normed_3_cast_fp16)[name = string("mh_q_7_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_5_cast_fp16 = reshape(shape = var_599, x = current_key_normed_3_cast_fp16)[name = string("mh_k_5_cast_fp16")]; + tensor var_603_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_603_cast_fp16")]; + tensor var_608_begin_0 = const()[name = string("op_608_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_end_0 = const()[name = string("op_608_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_608_end_mask_0 = const()[name = string("op_608_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_608_cast_fp16 = slice_by_index(begin = var_608_begin_0, end = var_608_end_0, end_mask = var_608_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_608_cast_fp16")]; + tensor var_614_begin_0 = const()[name = string("op_614_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_614_end_0 = const()[name = string("op_614_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_614_end_mask_0 = const()[name = string("op_614_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_614_cast_fp16 = slice_by_index(begin = var_614_begin_0, end = var_614_end_0, end_mask = var_614_end_mask_0, x = mh_q_7_cast_fp16)[name = string("op_614_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_616_cast_fp16")]; + bool var_618_interleave_0 = const()[name = string("op_618_interleave_0"), val = bool(false)]; + tensor var_618_cast_fp16 = concat(axis = var_496, interleave = var_618_interleave_0, values = (var_616_cast_fp16, var_608_cast_fp16))[name = string("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = mul(x = var_618_cast_fp16, y = sin_1_cast_fp16)[name = string("op_619_cast_fp16")]; + tensor mh_q_9_cast_fp16 = add(x = var_603_cast_fp16, y = var_619_cast_fp16)[name = string("mh_q_9_cast_fp16")]; + tensor var_621_cast_fp16 = mul(x = mh_k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_621_cast_fp16")]; + tensor var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_626_end_0 = const()[name = string("op_626_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = var_626_end_0, end_mask = var_626_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_626_cast_fp16")]; + tensor var_632_begin_0 = const()[name = string("op_632_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_632_end_0 = const()[name = string("op_632_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_632_end_mask_0 = const()[name = string("op_632_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_632_cast_fp16 = slice_by_index(begin = var_632_begin_0, end = var_632_end_0, end_mask = var_632_end_mask_0, x = mh_k_5_cast_fp16)[name = string("op_632_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_634_cast_fp16 = mul(x = var_632_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_634_cast_fp16")]; + bool var_636_interleave_0 = const()[name = string("op_636_interleave_0"), val = bool(false)]; + tensor var_636_cast_fp16 = concat(axis = var_496, interleave = var_636_interleave_0, values = (var_634_cast_fp16, var_626_cast_fp16))[name = string("op_636_cast_fp16")]; + tensor var_637_cast_fp16 = mul(x = var_636_cast_fp16, y = sin_1_cast_fp16)[name = string("op_637_cast_fp16")]; + tensor mh_k_7_cast_fp16 = add(x = var_621_cast_fp16, y = var_637_cast_fp16)[name = string("mh_k_7_cast_fp16")]; + tensor var_641 = const()[name = string("op_641"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_7_cast_fp16 = reshape(shape = var_641, x = mh_k_7_cast_fp16)[name = string("current_key_7_cast_fp16")]; + tensor var_648_cast_fp16 = mul(x = var_84_cast_fp16_1, y = var_260_cast_fp16)[name = string("op_648_cast_fp16")]; + tensor var_649_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_258_cast_fp16)[name = string("op_649_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_648_cast_fp16, y = var_649_cast_fp16)[name = string("key_9_cast_fp16")]; + tensor var_652_cast_fp16 = mul(x = var_92_cast_fp16_1, y = var_260_cast_fp16)[name = string("op_652_cast_fp16")]; + tensor var_653_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_258_cast_fp16)[name = string("op_653_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_652_cast_fp16, y = var_653_cast_fp16)[name = string("value_5_cast_fp16")]; + tensor var_657 = const()[name = string("op_657"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_5_cast_fp16 = reshape(shape = var_657, x = key_9_cast_fp16)[name = string("key_heads_5_cast_fp16")]; + tensor var_659 = const()[name = string("op_659"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_5_cast_fp16 = reshape(shape = var_659, x = value_5_cast_fp16)[name = string("value_heads_5_cast_fp16")]; + tensor var_662_begin_0 = const()[name = string("op_662_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_662_end_0 = const()[name = string("op_662_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_662_end_mask_0 = const()[name = string("op_662_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_662_cast_fp16 = slice_by_index(begin = var_662_begin_0, end = var_662_end_0, end_mask = var_662_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_662_cast_fp16")]; + tensor var_666_begin_0 = const()[name = string("op_666_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_666_end_0 = const()[name = string("op_666_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_666_end_mask_0 = const()[name = string("op_666_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_666_cast_fp16 = slice_by_index(begin = var_666_begin_0, end = var_666_end_0, end_mask = var_666_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor var_678_begin_0 = const()[name = string("op_678_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_678_end_0 = const()[name = string("op_678_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_678_end_mask_0 = const()[name = string("op_678_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_678_cast_fp16 = slice_by_index(begin = var_678_begin_0, end = var_678_end_0, end_mask = var_678_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_678_cast_fp16")]; + tensor var_682_begin_0 = const()[name = string("op_682_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_682_end_0 = const()[name = string("op_682_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_682_end_mask_0 = const()[name = string("op_682_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_682_cast_fp16 = slice_by_index(begin = var_682_begin_0, end = var_682_end_0, end_mask = var_682_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_682_cast_fp16")]; + tensor var_694_begin_0 = const()[name = string("op_694_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_694_end_0 = const()[name = string("op_694_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_694_end_mask_0 = const()[name = string("op_694_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_694_cast_fp16 = slice_by_index(begin = var_694_begin_0, end = var_694_end_0, end_mask = var_694_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_694_cast_fp16")]; + tensor var_698_begin_0 = const()[name = string("op_698_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_698_end_0 = const()[name = string("op_698_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_698_end_mask_0 = const()[name = string("op_698_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_698_cast_fp16 = slice_by_index(begin = var_698_begin_0, end = var_698_end_0, end_mask = var_698_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_698_cast_fp16")]; + tensor var_710_begin_0 = const()[name = string("op_710_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_710_end_0 = const()[name = string("op_710_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_710_end_mask_0 = const()[name = string("op_710_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_710_cast_fp16")]; + tensor var_714_begin_0 = const()[name = string("op_714_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_714_end_0 = const()[name = string("op_714_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_714_end_mask_0 = const()[name = string("op_714_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_714_cast_fp16 = slice_by_index(begin = var_714_begin_0, end = var_714_end_0, end_mask = var_714_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor var_726_begin_0 = const()[name = string("op_726_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_726_end_0 = const()[name = string("op_726_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_726_end_mask_0 = const()[name = string("op_726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_726_cast_fp16 = slice_by_index(begin = var_726_begin_0, end = var_726_end_0, end_mask = var_726_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_726_cast_fp16")]; + tensor var_730_begin_0 = const()[name = string("op_730_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_730_end_0 = const()[name = string("op_730_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_730_end_mask_0 = const()[name = string("op_730_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_730_cast_fp16 = slice_by_index(begin = var_730_begin_0, end = var_730_end_0, end_mask = var_730_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_730_cast_fp16")]; + tensor var_742_begin_0 = const()[name = string("op_742_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_742_end_0 = const()[name = string("op_742_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_742_end_mask_0 = const()[name = string("op_742_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_742_cast_fp16 = slice_by_index(begin = var_742_begin_0, end = var_742_end_0, end_mask = var_742_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_742_cast_fp16")]; + tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_746_end_0 = const()[name = string("op_746_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_746_cast_fp16")]; + tensor var_758_begin_0 = const()[name = string("op_758_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_758_end_0 = const()[name = string("op_758_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_758_end_mask_0 = const()[name = string("op_758_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_758_cast_fp16 = slice_by_index(begin = var_758_begin_0, end = var_758_end_0, end_mask = var_758_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_758_cast_fp16")]; + tensor var_762_begin_0 = const()[name = string("op_762_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_762_end_0 = const()[name = string("op_762_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_762_end_mask_0 = const()[name = string("op_762_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_762_cast_fp16 = slice_by_index(begin = var_762_begin_0, end = var_762_end_0, end_mask = var_762_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_762_cast_fp16")]; + tensor var_774_begin_0 = const()[name = string("op_774_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_774_end_0 = const()[name = string("op_774_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_774_end_mask_0 = const()[name = string("op_774_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = key_heads_5_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor var_778_begin_0 = const()[name = string("op_778_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_778_end_0 = const()[name = string("op_778_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_778_end_mask_0 = const()[name = string("op_778_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_778_cast_fp16 = slice_by_index(begin = var_778_begin_0, end = var_778_end_0, end_mask = var_778_end_mask_0, x = value_heads_5_cast_fp16)[name = string("op_778_cast_fp16")]; + bool key_heads_7_interleave_0 = const()[name = string("key_heads_7_interleave_0"), val = bool(false)]; + tensor key_heads_7_cast_fp16 = concat(axis = var_504, interleave = key_heads_7_interleave_0, values = (var_662_cast_fp16, var_662_cast_fp16, var_678_cast_fp16, var_678_cast_fp16, var_694_cast_fp16, var_694_cast_fp16, var_710_cast_fp16, var_710_cast_fp16, var_726_cast_fp16, var_726_cast_fp16, var_742_cast_fp16, var_742_cast_fp16, var_758_cast_fp16, var_758_cast_fp16, var_774_cast_fp16, var_774_cast_fp16))[name = string("key_heads_7_cast_fp16")]; + bool value_heads_7_interleave_0 = const()[name = string("value_heads_7_interleave_0"), val = bool(false)]; + tensor value_heads_7_cast_fp16 = concat(axis = var_504, interleave = value_heads_7_interleave_0, values = (var_666_cast_fp16, var_666_cast_fp16, var_682_cast_fp16, var_682_cast_fp16, var_698_cast_fp16, var_698_cast_fp16, var_714_cast_fp16, var_714_cast_fp16, var_730_cast_fp16, var_730_cast_fp16, var_746_cast_fp16, var_746_cast_fp16, var_762_cast_fp16, var_762_cast_fp16, var_778_cast_fp16, var_778_cast_fp16))[name = string("value_heads_7_cast_fp16")]; + fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_802_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_801_to_fp16)[name = string("op_802_cast_fp16")]; + bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; + bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_802_cast_fp16, y = key_heads_7_cast_fp16)[name = string("mh_w_5_cast_fp16")]; + tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_424_cast_fp16)[name = string("mh_w_7_cast_fp16")]; + tensor var_814_cast_fp16 = softmax(axis = var_486, x = mh_w_7_cast_fp16)[name = string("op_814_cast_fp16")]; + bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; + bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = value_heads_7_cast_fp16, y = var_814_cast_fp16)[name = string("attn_3_cast_fp16")]; + tensor var_819 = const()[name = string("op_819"), val = tensor([1, -1, 1, 1])]; + tensor input_9_cast_fp16 = reshape(shape = var_819, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; + string obj_19_pad_type_0 = const()[name = string("obj_19_pad_type_0"), val = string("valid")]; + tensor obj_19_strides_0 = const()[name = string("obj_19_strides_0"), val = tensor([1, 1])]; + tensor obj_19_pad_0 = const()[name = string("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_19_dilations_0 = const()[name = string("obj_19_dilations_0"), val = tensor([1, 1])]; + int32 obj_19_groups_0 = const()[name = string("obj_19_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19951616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22048832))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_19_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("obj_19_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_13_cast_fp16")]; + tensor inputs_sq_15_cast_fp16 = mul(x = inputs_13_cast_fp16, y = inputs_13_cast_fp16)[name = string("inputs_sq_15_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = inputs_sq_15_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_837_to_fp16 = const()[name = string("op_837_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_838_cast_fp16 = add(x = variance_15_cast_fp16, y = var_837_to_fp16)[name = string("op_838_cast_fp16")]; + fp32 var_839_epsilon_0 = const()[name = string("op_839_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_839_cast_fp16 = rsqrt(epsilon = var_839_epsilon_0, x = var_838_cast_fp16)[name = string("op_839_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = mul(x = inputs_13_cast_fp16, y = var_839_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor w_15_to_fp16 = const()[name = string("w_15_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049408)))]; + tensor input_11_cast_fp16 = mul(x = w_15_to_fp16, y = hidden_states_17_cast_fp16)[name = string("input_11_cast_fp16")]; + string input_13_pad_type_0 = const()[name = string("input_13_pad_type_0"), val = string("valid")]; + tensor input_13_strides_0 = const()[name = string("input_13_strides_0"), val = tensor([1, 1])]; + tensor input_13_pad_0 = const()[name = string("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_13_dilations_0 = const()[name = string("input_13_dilations_0"), val = tensor([1, 1])]; + int32 input_13_groups_0 = const()[name = string("input_13_groups_0"), val = int32(1)]; + tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22051520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25197312))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_13_cast_fp16 = conv(dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor var_853_cast_fp16 = silu(x = input_13_cast_fp16)[name = string("op_853_cast_fp16")]; + string var_859_pad_type_0 = const()[name = string("op_859_pad_type_0"), val = string("valid")]; + tensor var_859_strides_0 = const()[name = string("op_859_strides_0"), val = tensor([1, 1])]; + tensor var_859_pad_0 = const()[name = string("op_859_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_859_dilations_0 = const()[name = string("op_859_dilations_0"), val = tensor([1, 1])]; + int32 var_859_groups_0 = const()[name = string("op_859_groups_0"), val = int32(1)]; + tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25197888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28343680))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_859_cast_fp16 = conv(dilations = var_859_dilations_0, groups = var_859_groups_0, pad = var_859_pad_0, pad_type = var_859_pad_type_0, strides = var_859_strides_0, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_859_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_853_cast_fp16, y = var_859_cast_fp16)[name = string("input_15_cast_fp16")]; + string hidden_states_19_pad_type_0 = const()[name = string("hidden_states_19_pad_type_0"), val = string("valid")]; + tensor hidden_states_19_strides_0 = const()[name = string("hidden_states_19_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_19_pad_0 = const()[name = string("hidden_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_19_dilations_0 = const()[name = string("hidden_states_19_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_19_groups_0 = const()[name = string("hidden_states_19_groups_0"), val = int32(1)]; + tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28344256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490048))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_19_cast_fp16 = conv(dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_15_cast_fp16")]; + int32 var_873 = const()[name = string("op_873"), val = int32(3)]; + int32 var_883 = const()[name = string("op_883"), val = int32(-2)]; + int32 var_891 = const()[name = string("op_891"), val = int32(1)]; + tensor inputs_sq_17_cast_fp16 = mul(x = inputs_15_cast_fp16, y = inputs_15_cast_fp16)[name = string("inputs_sq_17_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = inputs_sq_17_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_903_to_fp16 = const()[name = string("op_903_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_904_cast_fp16 = add(x = variance_17_cast_fp16, y = var_903_to_fp16)[name = string("op_904_cast_fp16")]; + fp32 var_905_epsilon_0 = const()[name = string("op_905_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_905_cast_fp16 = rsqrt(epsilon = var_905_epsilon_0, x = var_904_cast_fp16)[name = string("op_905_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = mul(x = inputs_15_cast_fp16, y = var_905_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor w_17_to_fp16 = const()[name = string("w_17_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490624)))]; + tensor obj_21_cast_fp16 = mul(x = w_17_to_fp16, y = hidden_states_21_cast_fp16)[name = string("obj_21_cast_fp16")]; + string query_13_pad_type_0 = const()[name = string("query_13_pad_type_0"), val = string("valid")]; + tensor query_13_strides_0 = const()[name = string("query_13_strides_0"), val = tensor([1, 1])]; + tensor query_13_pad_0 = const()[name = string("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_13_dilations_0 = const()[name = string("query_13_dilations_0"), val = tensor([1, 1])]; + int32 query_13_groups_0 = const()[name = string("query_13_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31492736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33589952))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_13_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("query_13_cast_fp16")]; + string current_key_9_pad_type_0 = const()[name = string("current_key_9_pad_type_0"), val = string("valid")]; + tensor current_key_9_strides_0 = const()[name = string("current_key_9_strides_0"), val = tensor([1, 1])]; + tensor current_key_9_pad_0 = const()[name = string("current_key_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_9_dilations_0 = const()[name = string("current_key_9_dilations_0"), val = tensor([1, 1])]; + int32 current_key_9_groups_0 = const()[name = string("current_key_9_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34639168))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_9_cast_fp16 = conv(dilations = current_key_9_dilations_0, groups = current_key_9_groups_0, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = current_key_9_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_key_9_cast_fp16")]; + string current_value_5_pad_type_0 = const()[name = string("current_value_5_pad_type_0"), val = string("valid")]; + tensor current_value_5_strides_0 = const()[name = string("current_value_5_strides_0"), val = tensor([1, 1])]; + tensor current_value_5_pad_0 = const()[name = string("current_value_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_5_dilations_0 = const()[name = string("current_value_5_dilations_0"), val = tensor([1, 1])]; + int32 current_value_5_groups_0 = const()[name = string("current_value_5_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34639744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35688384))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_5_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_5_dilations_0, groups = current_value_5_groups_0, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = current_value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("current_value_5_cast_fp16")]; + tensor var_942 = const()[name = string("op_942"), val = tensor([16, 128, 1, 1])]; + tensor inputs_17_cast_fp16 = reshape(shape = var_942, x = query_13_cast_fp16)[name = string("inputs_17_cast_fp16")]; + tensor inputs_sq_19_cast_fp16 = mul(x = inputs_17_cast_fp16, y = inputs_17_cast_fp16)[name = string("inputs_sq_19_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = inputs_sq_19_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_948_to_fp16 = const()[name = string("op_948_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_949_cast_fp16 = add(x = variance_19_cast_fp16, y = var_948_to_fp16)[name = string("op_949_cast_fp16")]; + fp32 var_950_epsilon_0 = const()[name = string("op_950_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_950_cast_fp16 = rsqrt(epsilon = var_950_epsilon_0, x = var_949_cast_fp16)[name = string("op_950_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = inputs_17_cast_fp16, y = var_950_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor w_19_to_fp16 = const()[name = string("w_19_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35688960)))]; + tensor query_normed_5_cast_fp16 = mul(x = w_19_to_fp16, y = hidden_states_23_cast_fp16)[name = string("query_normed_5_cast_fp16")]; + tensor var_958 = const()[name = string("op_958"), val = tensor([8, 128, 1, 1])]; + tensor inputs_19_cast_fp16 = reshape(shape = var_958, x = current_key_9_cast_fp16)[name = string("inputs_19_cast_fp16")]; + tensor inputs_sq_21_cast_fp16 = mul(x = inputs_19_cast_fp16, y = inputs_19_cast_fp16)[name = string("inputs_sq_21_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = inputs_sq_21_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_964_to_fp16 = const()[name = string("op_964_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_965_cast_fp16 = add(x = variance_21_cast_fp16, y = var_964_to_fp16)[name = string("op_965_cast_fp16")]; + fp32 var_966_epsilon_0 = const()[name = string("op_966_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_966_cast_fp16 = rsqrt(epsilon = var_966_epsilon_0, x = var_965_cast_fp16)[name = string("op_966_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = mul(x = inputs_19_cast_fp16, y = var_966_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + tensor w_21_to_fp16 = const()[name = string("w_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35689280)))]; + tensor current_key_normed_5_cast_fp16 = mul(x = w_21_to_fp16, y = hidden_states_25_cast_fp16)[name = string("current_key_normed_5_cast_fp16")]; + tensor var_984 = const()[name = string("op_984"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_984, x = query_normed_5_cast_fp16)[name = string("mh_q_13_cast_fp16")]; + tensor var_986 = const()[name = string("op_986"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_9_cast_fp16 = reshape(shape = var_986, x = current_key_normed_5_cast_fp16)[name = string("mh_k_9_cast_fp16")]; + tensor var_990_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_990_cast_fp16")]; + tensor var_995_begin_0 = const()[name = string("op_995_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_995_end_0 = const()[name = string("op_995_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_995_end_mask_0 = const()[name = string("op_995_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_995_cast_fp16 = slice_by_index(begin = var_995_begin_0, end = var_995_end_0, end_mask = var_995_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_995_cast_fp16")]; + tensor var_1001_begin_0 = const()[name = string("op_1001_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1001_end_0 = const()[name = string("op_1001_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1001_end_mask_0 = const()[name = string("op_1001_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1001_cast_fp16 = slice_by_index(begin = var_1001_begin_0, end = var_1001_end_0, end_mask = var_1001_end_mask_0, x = mh_q_13_cast_fp16)[name = string("op_1001_cast_fp16")]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1003_cast_fp16 = mul(x = var_1001_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_1003_cast_fp16")]; + bool var_1005_interleave_0 = const()[name = string("op_1005_interleave_0"), val = bool(false)]; + tensor var_1005_cast_fp16 = concat(axis = var_883, interleave = var_1005_interleave_0, values = (var_1003_cast_fp16, var_995_cast_fp16))[name = string("op_1005_cast_fp16")]; + tensor var_1006_cast_fp16 = mul(x = var_1005_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1006_cast_fp16")]; + tensor mh_q_15_cast_fp16 = add(x = var_990_cast_fp16, y = var_1006_cast_fp16)[name = string("mh_q_15_cast_fp16")]; + tensor var_1008_cast_fp16 = mul(x = mh_k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1008_cast_fp16")]; + tensor var_1013_begin_0 = const()[name = string("op_1013_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1013_end_0 = const()[name = string("op_1013_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1013_end_mask_0 = const()[name = string("op_1013_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1013_cast_fp16 = slice_by_index(begin = var_1013_begin_0, end = var_1013_end_0, end_mask = var_1013_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1013_cast_fp16")]; + tensor var_1019_begin_0 = const()[name = string("op_1019_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1019_end_0 = const()[name = string("op_1019_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1019_end_mask_0 = const()[name = string("op_1019_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1019_cast_fp16 = slice_by_index(begin = var_1019_begin_0, end = var_1019_end_0, end_mask = var_1019_end_mask_0, x = mh_k_9_cast_fp16)[name = string("op_1019_cast_fp16")]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1021_cast_fp16 = mul(x = var_1019_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_1021_cast_fp16")]; + bool var_1023_interleave_0 = const()[name = string("op_1023_interleave_0"), val = bool(false)]; + tensor var_1023_cast_fp16 = concat(axis = var_883, interleave = var_1023_interleave_0, values = (var_1021_cast_fp16, var_1013_cast_fp16))[name = string("op_1023_cast_fp16")]; + tensor var_1024_cast_fp16 = mul(x = var_1023_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1024_cast_fp16")]; + tensor mh_k_11_cast_fp16 = add(x = var_1008_cast_fp16, y = var_1024_cast_fp16)[name = string("mh_k_11_cast_fp16")]; + tensor var_1028 = const()[name = string("op_1028"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_11_cast_fp16 = reshape(shape = var_1028, x = mh_k_11_cast_fp16)[name = string("current_key_11_cast_fp16")]; + tensor var_1035_cast_fp16 = mul(x = var_84_cast_fp16_2, y = var_260_cast_fp16)[name = string("op_1035_cast_fp16")]; + tensor var_1036_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_258_cast_fp16)[name = string("op_1036_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1035_cast_fp16, y = var_1036_cast_fp16)[name = string("key_15_cast_fp16")]; + tensor var_1039_cast_fp16 = mul(x = var_92_cast_fp16_2, y = var_260_cast_fp16)[name = string("op_1039_cast_fp16")]; + tensor var_1040_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_258_cast_fp16)[name = string("op_1040_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1039_cast_fp16, y = var_1040_cast_fp16)[name = string("value_9_cast_fp16")]; + tensor var_1044 = const()[name = string("op_1044"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_9_cast_fp16 = reshape(shape = var_1044, x = key_15_cast_fp16)[name = string("key_heads_9_cast_fp16")]; + tensor var_1046 = const()[name = string("op_1046"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_9_cast_fp16 = reshape(shape = var_1046, x = value_9_cast_fp16)[name = string("value_heads_9_cast_fp16")]; + tensor var_1049_begin_0 = const()[name = string("op_1049_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_end_0 = const()[name = string("op_1049_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1049_end_mask_0 = const()[name = string("op_1049_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1049_cast_fp16 = slice_by_index(begin = var_1049_begin_0, end = var_1049_end_0, end_mask = var_1049_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1049_cast_fp16")]; + tensor var_1053_begin_0 = const()[name = string("op_1053_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1053_end_0 = const()[name = string("op_1053_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1053_end_mask_0 = const()[name = string("op_1053_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1053_cast_fp16 = slice_by_index(begin = var_1053_begin_0, end = var_1053_end_0, end_mask = var_1053_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1053_cast_fp16")]; + tensor var_1065_begin_0 = const()[name = string("op_1065_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1065_end_0 = const()[name = string("op_1065_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1065_end_mask_0 = const()[name = string("op_1065_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1065_cast_fp16 = slice_by_index(begin = var_1065_begin_0, end = var_1065_end_0, end_mask = var_1065_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1065_cast_fp16")]; + tensor var_1069_begin_0 = const()[name = string("op_1069_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1069_end_0 = const()[name = string("op_1069_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1069_end_mask_0 = const()[name = string("op_1069_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1069_cast_fp16 = slice_by_index(begin = var_1069_begin_0, end = var_1069_end_0, end_mask = var_1069_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1069_cast_fp16")]; + tensor var_1081_begin_0 = const()[name = string("op_1081_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1081_end_0 = const()[name = string("op_1081_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1081_end_mask_0 = const()[name = string("op_1081_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1081_cast_fp16 = slice_by_index(begin = var_1081_begin_0, end = var_1081_end_0, end_mask = var_1081_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1081_cast_fp16")]; + tensor var_1085_begin_0 = const()[name = string("op_1085_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1085_end_0 = const()[name = string("op_1085_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1085_end_mask_0 = const()[name = string("op_1085_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1085_cast_fp16 = slice_by_index(begin = var_1085_begin_0, end = var_1085_end_0, end_mask = var_1085_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1085_cast_fp16")]; + tensor var_1097_begin_0 = const()[name = string("op_1097_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1097_end_0 = const()[name = string("op_1097_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1097_end_mask_0 = const()[name = string("op_1097_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1097_cast_fp16 = slice_by_index(begin = var_1097_begin_0, end = var_1097_end_0, end_mask = var_1097_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1097_cast_fp16")]; + tensor var_1101_begin_0 = const()[name = string("op_1101_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1101_end_0 = const()[name = string("op_1101_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1101_end_mask_0 = const()[name = string("op_1101_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1101_cast_fp16 = slice_by_index(begin = var_1101_begin_0, end = var_1101_end_0, end_mask = var_1101_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor var_1113_begin_0 = const()[name = string("op_1113_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1113_end_0 = const()[name = string("op_1113_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1113_end_mask_0 = const()[name = string("op_1113_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1113_cast_fp16 = slice_by_index(begin = var_1113_begin_0, end = var_1113_end_0, end_mask = var_1113_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1113_cast_fp16")]; + tensor var_1117_begin_0 = const()[name = string("op_1117_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1117_end_0 = const()[name = string("op_1117_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1117_end_mask_0 = const()[name = string("op_1117_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1117_cast_fp16 = slice_by_index(begin = var_1117_begin_0, end = var_1117_end_0, end_mask = var_1117_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor var_1129_begin_0 = const()[name = string("op_1129_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1129_end_0 = const()[name = string("op_1129_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1129_end_mask_0 = const()[name = string("op_1129_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1129_cast_fp16 = slice_by_index(begin = var_1129_begin_0, end = var_1129_end_0, end_mask = var_1129_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1129_cast_fp16")]; + tensor var_1133_begin_0 = const()[name = string("op_1133_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1133_end_0 = const()[name = string("op_1133_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1133_end_mask_0 = const()[name = string("op_1133_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1133_cast_fp16 = slice_by_index(begin = var_1133_begin_0, end = var_1133_end_0, end_mask = var_1133_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1133_cast_fp16")]; + tensor var_1145_begin_0 = const()[name = string("op_1145_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1145_end_0 = const()[name = string("op_1145_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1145_end_mask_0 = const()[name = string("op_1145_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1145_cast_fp16 = slice_by_index(begin = var_1145_begin_0, end = var_1145_end_0, end_mask = var_1145_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1145_cast_fp16")]; + tensor var_1149_begin_0 = const()[name = string("op_1149_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1149_end_0 = const()[name = string("op_1149_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1149_end_mask_0 = const()[name = string("op_1149_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1149_cast_fp16 = slice_by_index(begin = var_1149_begin_0, end = var_1149_end_0, end_mask = var_1149_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1149_cast_fp16")]; + tensor var_1161_begin_0 = const()[name = string("op_1161_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1161_end_0 = const()[name = string("op_1161_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1161_end_mask_0 = const()[name = string("op_1161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1161_cast_fp16 = slice_by_index(begin = var_1161_begin_0, end = var_1161_end_0, end_mask = var_1161_end_mask_0, x = key_heads_9_cast_fp16)[name = string("op_1161_cast_fp16")]; + tensor var_1165_begin_0 = const()[name = string("op_1165_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1165_end_0 = const()[name = string("op_1165_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1165_end_mask_0 = const()[name = string("op_1165_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1165_cast_fp16 = slice_by_index(begin = var_1165_begin_0, end = var_1165_end_0, end_mask = var_1165_end_mask_0, x = value_heads_9_cast_fp16)[name = string("op_1165_cast_fp16")]; + bool key_heads_11_interleave_0 = const()[name = string("key_heads_11_interleave_0"), val = bool(false)]; + tensor key_heads_11_cast_fp16 = concat(axis = var_891, interleave = key_heads_11_interleave_0, values = (var_1049_cast_fp16, var_1049_cast_fp16, var_1065_cast_fp16, var_1065_cast_fp16, var_1081_cast_fp16, var_1081_cast_fp16, var_1097_cast_fp16, var_1097_cast_fp16, var_1113_cast_fp16, var_1113_cast_fp16, var_1129_cast_fp16, var_1129_cast_fp16, var_1145_cast_fp16, var_1145_cast_fp16, var_1161_cast_fp16, var_1161_cast_fp16))[name = string("key_heads_11_cast_fp16")]; + bool value_heads_11_interleave_0 = const()[name = string("value_heads_11_interleave_0"), val = bool(false)]; + tensor value_heads_11_cast_fp16 = concat(axis = var_891, interleave = value_heads_11_interleave_0, values = (var_1053_cast_fp16, var_1053_cast_fp16, var_1069_cast_fp16, var_1069_cast_fp16, var_1085_cast_fp16, var_1085_cast_fp16, var_1101_cast_fp16, var_1101_cast_fp16, var_1117_cast_fp16, var_1117_cast_fp16, var_1133_cast_fp16, var_1133_cast_fp16, var_1149_cast_fp16, var_1149_cast_fp16, var_1165_cast_fp16, var_1165_cast_fp16))[name = string("value_heads_11_cast_fp16")]; + fp16 var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1189_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1188_to_fp16)[name = string("op_1189_cast_fp16")]; + bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; + bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1189_cast_fp16, y = key_heads_11_cast_fp16)[name = string("mh_w_9_cast_fp16")]; + tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_424_cast_fp16)[name = string("mh_w_11_cast_fp16")]; + tensor var_1201_cast_fp16 = softmax(axis = var_873, x = mh_w_11_cast_fp16)[name = string("op_1201_cast_fp16")]; + bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; + bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = value_heads_11_cast_fp16, y = var_1201_cast_fp16)[name = string("attn_5_cast_fp16")]; + tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, -1, 1, 1])]; + tensor input_17_cast_fp16 = reshape(shape = var_1206, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")]; + string obj_27_pad_type_0 = const()[name = string("obj_27_pad_type_0"), val = string("valid")]; + tensor obj_27_strides_0 = const()[name = string("obj_27_strides_0"), val = tensor([1, 1])]; + tensor obj_27_pad_0 = const()[name = string("obj_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_27_dilations_0 = const()[name = string("obj_27_dilations_0"), val = tensor([1, 1])]; + int32 obj_27_groups_0 = const()[name = string("obj_27_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35689600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37786816))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_27_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_27_dilations_0, groups = obj_27_groups_0, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = obj_27_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("obj_27_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_21_cast_fp16")]; + tensor inputs_sq_23_cast_fp16 = mul(x = inputs_21_cast_fp16, y = inputs_21_cast_fp16)[name = string("inputs_sq_23_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = inputs_sq_23_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1224_to_fp16 = const()[name = string("op_1224_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1225_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1224_to_fp16)[name = string("op_1225_cast_fp16")]; + fp32 var_1226_epsilon_0 = const()[name = string("op_1226_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1226_cast_fp16 = rsqrt(epsilon = var_1226_epsilon_0, x = var_1225_cast_fp16)[name = string("op_1226_cast_fp16")]; + tensor hidden_states_27_cast_fp16 = mul(x = inputs_21_cast_fp16, y = var_1226_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; + tensor w_23_to_fp16 = const()[name = string("w_23_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787392)))]; + tensor input_19_cast_fp16 = mul(x = w_23_to_fp16, y = hidden_states_27_cast_fp16)[name = string("input_19_cast_fp16")]; + string input_21_pad_type_0 = const()[name = string("input_21_pad_type_0"), val = string("valid")]; + tensor input_21_strides_0 = const()[name = string("input_21_strides_0"), val = tensor([1, 1])]; + tensor input_21_pad_0 = const()[name = string("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_21_dilations_0 = const()[name = string("input_21_dilations_0"), val = tensor([1, 1])]; + int32 input_21_groups_0 = const()[name = string("input_21_groups_0"), val = int32(1)]; + tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37789504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40935296))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_21_cast_fp16 = conv(dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_1240_cast_fp16 = silu(x = input_21_cast_fp16)[name = string("op_1240_cast_fp16")]; + string var_1246_pad_type_0 = const()[name = string("op_1246_pad_type_0"), val = string("valid")]; + tensor var_1246_strides_0 = const()[name = string("op_1246_strides_0"), val = tensor([1, 1])]; + tensor var_1246_pad_0 = const()[name = string("op_1246_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1246_dilations_0 = const()[name = string("op_1246_dilations_0"), val = tensor([1, 1])]; + int32 var_1246_groups_0 = const()[name = string("op_1246_groups_0"), val = int32(1)]; + tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40935872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44081664))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1246_cast_fp16 = conv(dilations = var_1246_dilations_0, groups = var_1246_groups_0, pad = var_1246_pad_0, pad_type = var_1246_pad_type_0, strides = var_1246_strides_0, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_1246_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1246_cast_fp16)[name = string("input_23_cast_fp16")]; + string hidden_states_29_pad_type_0 = const()[name = string("hidden_states_29_pad_type_0"), val = string("valid")]; + tensor hidden_states_29_strides_0 = const()[name = string("hidden_states_29_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_0 = const()[name = string("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_29_dilations_0 = const()[name = string("hidden_states_29_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_29_groups_0 = const()[name = string("hidden_states_29_groups_0"), val = int32(1)]; + tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44082240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228032))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_29_cast_fp16 = conv(dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("inputs_23_cast_fp16")]; + int32 var_1260 = const()[name = string("op_1260"), val = int32(3)]; + int32 var_1270 = const()[name = string("op_1270"), val = int32(-2)]; + int32 var_1278 = const()[name = string("op_1278"), val = int32(1)]; + tensor inputs_sq_25_cast_fp16 = mul(x = inputs_23_cast_fp16, y = inputs_23_cast_fp16)[name = string("inputs_sq_25_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = inputs_sq_25_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1290_to_fp16 = const()[name = string("op_1290_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1291_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1290_to_fp16)[name = string("op_1291_cast_fp16")]; + fp32 var_1292_epsilon_0 = const()[name = string("op_1292_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1292_cast_fp16 = rsqrt(epsilon = var_1292_epsilon_0, x = var_1291_cast_fp16)[name = string("op_1292_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = mul(x = inputs_23_cast_fp16, y = var_1292_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor w_25_to_fp16 = const()[name = string("w_25_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228608)))]; + tensor obj_29_cast_fp16 = mul(x = w_25_to_fp16, y = hidden_states_31_cast_fp16)[name = string("obj_29_cast_fp16")]; + string query_19_pad_type_0 = const()[name = string("query_19_pad_type_0"), val = string("valid")]; + tensor query_19_strides_0 = const()[name = string("query_19_strides_0"), val = tensor([1, 1])]; + tensor query_19_pad_0 = const()[name = string("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_19_dilations_0 = const()[name = string("query_19_dilations_0"), val = tensor([1, 1])]; + int32 query_19_groups_0 = const()[name = string("query_19_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47230720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49327936))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_19_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("query_19_cast_fp16")]; + string current_key_13_pad_type_0 = const()[name = string("current_key_13_pad_type_0"), val = string("valid")]; + tensor current_key_13_strides_0 = const()[name = string("current_key_13_strides_0"), val = tensor([1, 1])]; + tensor current_key_13_pad_0 = const()[name = string("current_key_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_13_dilations_0 = const()[name = string("current_key_13_dilations_0"), val = tensor([1, 1])]; + int32 current_key_13_groups_0 = const()[name = string("current_key_13_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49328512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50377152))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_13_cast_fp16 = conv(dilations = current_key_13_dilations_0, groups = current_key_13_groups_0, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = current_key_13_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_key_13_cast_fp16")]; + string current_value_7_pad_type_0 = const()[name = string("current_value_7_pad_type_0"), val = string("valid")]; + tensor current_value_7_strides_0 = const()[name = string("current_value_7_strides_0"), val = tensor([1, 1])]; + tensor current_value_7_pad_0 = const()[name = string("current_value_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_7_dilations_0 = const()[name = string("current_value_7_dilations_0"), val = tensor([1, 1])]; + int32 current_value_7_groups_0 = const()[name = string("current_value_7_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50377728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51426368))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_7_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_7_dilations_0, groups = current_value_7_groups_0, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = current_value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("current_value_7_cast_fp16")]; + tensor var_1329 = const()[name = string("op_1329"), val = tensor([16, 128, 1, 1])]; + tensor inputs_25_cast_fp16 = reshape(shape = var_1329, x = query_19_cast_fp16)[name = string("inputs_25_cast_fp16")]; + tensor inputs_sq_27_cast_fp16 = mul(x = inputs_25_cast_fp16, y = inputs_25_cast_fp16)[name = string("inputs_sq_27_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = inputs_sq_27_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1336_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1335_to_fp16)[name = string("op_1336_cast_fp16")]; + fp32 var_1337_epsilon_0 = const()[name = string("op_1337_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1337_cast_fp16 = rsqrt(epsilon = var_1337_epsilon_0, x = var_1336_cast_fp16)[name = string("op_1337_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = inputs_25_cast_fp16, y = var_1337_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor w_27_to_fp16 = const()[name = string("w_27_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51426944)))]; + tensor query_normed_7_cast_fp16 = mul(x = w_27_to_fp16, y = hidden_states_33_cast_fp16)[name = string("query_normed_7_cast_fp16")]; + tensor var_1345 = const()[name = string("op_1345"), val = tensor([8, 128, 1, 1])]; + tensor inputs_27_cast_fp16 = reshape(shape = var_1345, x = current_key_13_cast_fp16)[name = string("inputs_27_cast_fp16")]; + tensor inputs_sq_29_cast_fp16 = mul(x = inputs_27_cast_fp16, y = inputs_27_cast_fp16)[name = string("inputs_sq_29_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = inputs_sq_29_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1352_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1351_to_fp16)[name = string("op_1352_cast_fp16")]; + fp32 var_1353_epsilon_0 = const()[name = string("op_1353_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1353_cast_fp16 = rsqrt(epsilon = var_1353_epsilon_0, x = var_1352_cast_fp16)[name = string("op_1353_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = mul(x = inputs_27_cast_fp16, y = var_1353_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; + tensor w_29_to_fp16 = const()[name = string("w_29_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51427264)))]; + tensor current_key_normed_7_cast_fp16 = mul(x = w_29_to_fp16, y = hidden_states_35_cast_fp16)[name = string("current_key_normed_7_cast_fp16")]; + tensor var_1371 = const()[name = string("op_1371"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1371, x = query_normed_7_cast_fp16)[name = string("mh_q_19_cast_fp16")]; + tensor var_1373 = const()[name = string("op_1373"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_13_cast_fp16 = reshape(shape = var_1373, x = current_key_normed_7_cast_fp16)[name = string("mh_k_13_cast_fp16")]; + tensor var_1377_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1377_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = string("op_1382_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1382_end_0 = const()[name = string("op_1382_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1382_end_mask_0 = const()[name = string("op_1382_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1388_begin_0 = const()[name = string("op_1388_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1388_end_0 = const()[name = string("op_1388_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1388_end_mask_0 = const()[name = string("op_1388_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1388_cast_fp16 = slice_by_index(begin = var_1388_begin_0, end = var_1388_end_0, end_mask = var_1388_end_mask_0, x = mh_q_19_cast_fp16)[name = string("op_1388_cast_fp16")]; + fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1390_cast_fp16 = mul(x = var_1388_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_1390_cast_fp16")]; + bool var_1392_interleave_0 = const()[name = string("op_1392_interleave_0"), val = bool(false)]; + tensor var_1392_cast_fp16 = concat(axis = var_1270, interleave = var_1392_interleave_0, values = (var_1390_cast_fp16, var_1382_cast_fp16))[name = string("op_1392_cast_fp16")]; + tensor var_1393_cast_fp16 = mul(x = var_1392_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1393_cast_fp16")]; + tensor mh_q_21_cast_fp16 = add(x = var_1377_cast_fp16, y = var_1393_cast_fp16)[name = string("mh_q_21_cast_fp16")]; + tensor var_1395_cast_fp16 = mul(x = mh_k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1395_cast_fp16")]; + tensor var_1400_begin_0 = const()[name = string("op_1400_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1400_end_0 = const()[name = string("op_1400_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1400_end_mask_0 = const()[name = string("op_1400_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1400_cast_fp16 = slice_by_index(begin = var_1400_begin_0, end = var_1400_end_0, end_mask = var_1400_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1400_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = string("op_1406_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1406_end_0 = const()[name = string("op_1406_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1406_end_mask_0 = const()[name = string("op_1406_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = mh_k_13_cast_fp16)[name = string("op_1406_cast_fp16")]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1408_cast_fp16 = mul(x = var_1406_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_1408_cast_fp16")]; + bool var_1410_interleave_0 = const()[name = string("op_1410_interleave_0"), val = bool(false)]; + tensor var_1410_cast_fp16 = concat(axis = var_1270, interleave = var_1410_interleave_0, values = (var_1408_cast_fp16, var_1400_cast_fp16))[name = string("op_1410_cast_fp16")]; + tensor var_1411_cast_fp16 = mul(x = var_1410_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1411_cast_fp16")]; + tensor mh_k_15_cast_fp16 = add(x = var_1395_cast_fp16, y = var_1411_cast_fp16)[name = string("mh_k_15_cast_fp16")]; + tensor var_1415 = const()[name = string("op_1415"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_15_cast_fp16 = reshape(shape = var_1415, x = mh_k_15_cast_fp16)[name = string("current_key_15_cast_fp16")]; + tensor var_1422_cast_fp16 = mul(x = var_84_cast_fp16_3, y = var_260_cast_fp16)[name = string("op_1422_cast_fp16")]; + tensor var_1423_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_258_cast_fp16)[name = string("op_1423_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_1422_cast_fp16, y = var_1423_cast_fp16)[name = string("key_21_cast_fp16")]; + tensor var_1426_cast_fp16 = mul(x = var_92_cast_fp16_3, y = var_260_cast_fp16)[name = string("op_1426_cast_fp16")]; + tensor var_1427_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_258_cast_fp16)[name = string("op_1427_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1426_cast_fp16, y = var_1427_cast_fp16)[name = string("value_13_cast_fp16")]; + tensor var_1431 = const()[name = string("op_1431"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_13_cast_fp16 = reshape(shape = var_1431, x = key_21_cast_fp16)[name = string("key_heads_13_cast_fp16")]; + tensor var_1433 = const()[name = string("op_1433"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_13_cast_fp16 = reshape(shape = var_1433, x = value_13_cast_fp16)[name = string("value_heads_13_cast_fp16")]; + tensor var_1436_begin_0 = const()[name = string("op_1436_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1436_end_0 = const()[name = string("op_1436_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1436_end_mask_0 = const()[name = string("op_1436_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1436_cast_fp16 = slice_by_index(begin = var_1436_begin_0, end = var_1436_end_0, end_mask = var_1436_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1436_cast_fp16")]; + tensor var_1440_begin_0 = const()[name = string("op_1440_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1440_end_0 = const()[name = string("op_1440_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1440_end_mask_0 = const()[name = string("op_1440_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1440_cast_fp16 = slice_by_index(begin = var_1440_begin_0, end = var_1440_end_0, end_mask = var_1440_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1440_cast_fp16")]; + tensor var_1452_begin_0 = const()[name = string("op_1452_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1452_end_0 = const()[name = string("op_1452_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1452_end_mask_0 = const()[name = string("op_1452_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1452_cast_fp16 = slice_by_index(begin = var_1452_begin_0, end = var_1452_end_0, end_mask = var_1452_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1452_cast_fp16")]; + tensor var_1456_begin_0 = const()[name = string("op_1456_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1456_end_0 = const()[name = string("op_1456_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1456_end_mask_0 = const()[name = string("op_1456_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1456_cast_fp16 = slice_by_index(begin = var_1456_begin_0, end = var_1456_end_0, end_mask = var_1456_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1456_cast_fp16")]; + tensor var_1468_begin_0 = const()[name = string("op_1468_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1468_end_0 = const()[name = string("op_1468_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1468_end_mask_0 = const()[name = string("op_1468_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1468_cast_fp16 = slice_by_index(begin = var_1468_begin_0, end = var_1468_end_0, end_mask = var_1468_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1468_cast_fp16")]; + tensor var_1472_begin_0 = const()[name = string("op_1472_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1472_end_0 = const()[name = string("op_1472_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1472_end_mask_0 = const()[name = string("op_1472_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1472_cast_fp16 = slice_by_index(begin = var_1472_begin_0, end = var_1472_end_0, end_mask = var_1472_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1472_cast_fp16")]; + tensor var_1484_begin_0 = const()[name = string("op_1484_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1484_end_0 = const()[name = string("op_1484_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1484_end_mask_0 = const()[name = string("op_1484_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = var_1484_end_0, end_mask = var_1484_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1484_cast_fp16")]; + tensor var_1488_begin_0 = const()[name = string("op_1488_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1488_end_0 = const()[name = string("op_1488_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1488_end_mask_0 = const()[name = string("op_1488_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1488_cast_fp16 = slice_by_index(begin = var_1488_begin_0, end = var_1488_end_0, end_mask = var_1488_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1488_cast_fp16")]; + tensor var_1500_begin_0 = const()[name = string("op_1500_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1500_end_0 = const()[name = string("op_1500_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1500_end_mask_0 = const()[name = string("op_1500_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1500_cast_fp16 = slice_by_index(begin = var_1500_begin_0, end = var_1500_end_0, end_mask = var_1500_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1500_cast_fp16")]; + tensor var_1504_begin_0 = const()[name = string("op_1504_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1504_end_0 = const()[name = string("op_1504_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1504_end_mask_0 = const()[name = string("op_1504_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1504_cast_fp16 = slice_by_index(begin = var_1504_begin_0, end = var_1504_end_0, end_mask = var_1504_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1504_cast_fp16")]; + tensor var_1516_begin_0 = const()[name = string("op_1516_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1516_end_0 = const()[name = string("op_1516_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1516_end_mask_0 = const()[name = string("op_1516_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1516_cast_fp16 = slice_by_index(begin = var_1516_begin_0, end = var_1516_end_0, end_mask = var_1516_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1516_cast_fp16")]; + tensor var_1520_begin_0 = const()[name = string("op_1520_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1520_end_0 = const()[name = string("op_1520_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1520_end_mask_0 = const()[name = string("op_1520_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1520_cast_fp16 = slice_by_index(begin = var_1520_begin_0, end = var_1520_end_0, end_mask = var_1520_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1520_cast_fp16")]; + tensor var_1532_begin_0 = const()[name = string("op_1532_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1532_end_0 = const()[name = string("op_1532_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1532_end_mask_0 = const()[name = string("op_1532_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1532_cast_fp16 = slice_by_index(begin = var_1532_begin_0, end = var_1532_end_0, end_mask = var_1532_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1532_cast_fp16")]; + tensor var_1536_begin_0 = const()[name = string("op_1536_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1536_end_0 = const()[name = string("op_1536_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1536_end_mask_0 = const()[name = string("op_1536_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1536_cast_fp16 = slice_by_index(begin = var_1536_begin_0, end = var_1536_end_0, end_mask = var_1536_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1536_cast_fp16")]; + tensor var_1548_begin_0 = const()[name = string("op_1548_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1548_end_0 = const()[name = string("op_1548_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1548_end_mask_0 = const()[name = string("op_1548_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1548_cast_fp16 = slice_by_index(begin = var_1548_begin_0, end = var_1548_end_0, end_mask = var_1548_end_mask_0, x = key_heads_13_cast_fp16)[name = string("op_1548_cast_fp16")]; + tensor var_1552_begin_0 = const()[name = string("op_1552_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1552_end_0 = const()[name = string("op_1552_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1552_end_mask_0 = const()[name = string("op_1552_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1552_cast_fp16 = slice_by_index(begin = var_1552_begin_0, end = var_1552_end_0, end_mask = var_1552_end_mask_0, x = value_heads_13_cast_fp16)[name = string("op_1552_cast_fp16")]; + bool key_heads_15_interleave_0 = const()[name = string("key_heads_15_interleave_0"), val = bool(false)]; + tensor key_heads_15_cast_fp16 = concat(axis = var_1278, interleave = key_heads_15_interleave_0, values = (var_1436_cast_fp16, var_1436_cast_fp16, var_1452_cast_fp16, var_1452_cast_fp16, var_1468_cast_fp16, var_1468_cast_fp16, var_1484_cast_fp16, var_1484_cast_fp16, var_1500_cast_fp16, var_1500_cast_fp16, var_1516_cast_fp16, var_1516_cast_fp16, var_1532_cast_fp16, var_1532_cast_fp16, var_1548_cast_fp16, var_1548_cast_fp16))[name = string("key_heads_15_cast_fp16")]; + bool value_heads_15_interleave_0 = const()[name = string("value_heads_15_interleave_0"), val = bool(false)]; + tensor value_heads_15_cast_fp16 = concat(axis = var_1278, interleave = value_heads_15_interleave_0, values = (var_1440_cast_fp16, var_1440_cast_fp16, var_1456_cast_fp16, var_1456_cast_fp16, var_1472_cast_fp16, var_1472_cast_fp16, var_1488_cast_fp16, var_1488_cast_fp16, var_1504_cast_fp16, var_1504_cast_fp16, var_1520_cast_fp16, var_1520_cast_fp16, var_1536_cast_fp16, var_1536_cast_fp16, var_1552_cast_fp16, var_1552_cast_fp16))[name = string("value_heads_15_cast_fp16")]; + fp16 var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1575_to_fp16)[name = string("op_1576_cast_fp16")]; + bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; + bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1576_cast_fp16, y = key_heads_15_cast_fp16)[name = string("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_424_cast_fp16)[name = string("mh_w_15_cast_fp16")]; + tensor var_1588_cast_fp16 = softmax(axis = var_1260, x = mh_w_15_cast_fp16)[name = string("op_1588_cast_fp16")]; + bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; + bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = value_heads_15_cast_fp16, y = var_1588_cast_fp16)[name = string("attn_7_cast_fp16")]; + tensor var_1593 = const()[name = string("op_1593"), val = tensor([1, -1, 1, 1])]; + tensor input_25_cast_fp16 = reshape(shape = var_1593, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")]; + string obj_35_pad_type_0 = const()[name = string("obj_35_pad_type_0"), val = string("valid")]; + tensor obj_35_strides_0 = const()[name = string("obj_35_strides_0"), val = tensor([1, 1])]; + tensor obj_35_pad_0 = const()[name = string("obj_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_35_dilations_0 = const()[name = string("obj_35_dilations_0"), val = tensor([1, 1])]; + int32 obj_35_groups_0 = const()[name = string("obj_35_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51427584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53524800))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_35_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("obj_35_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_23_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_29_cast_fp16")]; + tensor inputs_sq_31_cast_fp16 = mul(x = inputs_29_cast_fp16, y = inputs_29_cast_fp16)[name = string("inputs_sq_31_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = inputs_sq_31_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1612_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1611_to_fp16)[name = string("op_1612_cast_fp16")]; + fp32 var_1613_epsilon_0 = const()[name = string("op_1613_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1613_cast_fp16 = rsqrt(epsilon = var_1613_epsilon_0, x = var_1612_cast_fp16)[name = string("op_1613_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = mul(x = inputs_29_cast_fp16, y = var_1613_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor w_31_to_fp16 = const()[name = string("w_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525376)))]; + tensor input_27_cast_fp16 = mul(x = w_31_to_fp16, y = hidden_states_37_cast_fp16)[name = string("input_27_cast_fp16")]; + string input_29_pad_type_0 = const()[name = string("input_29_pad_type_0"), val = string("valid")]; + tensor input_29_strides_0 = const()[name = string("input_29_strides_0"), val = tensor([1, 1])]; + tensor input_29_pad_0 = const()[name = string("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_29_dilations_0 = const()[name = string("input_29_dilations_0"), val = tensor([1, 1])]; + int32 input_29_groups_0 = const()[name = string("input_29_groups_0"), val = int32(1)]; + tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53527488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56673280))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_29_cast_fp16 = conv(dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_1627_cast_fp16 = silu(x = input_29_cast_fp16)[name = string("op_1627_cast_fp16")]; + string var_1633_pad_type_0 = const()[name = string("op_1633_pad_type_0"), val = string("valid")]; + tensor var_1633_strides_0 = const()[name = string("op_1633_strides_0"), val = tensor([1, 1])]; + tensor var_1633_pad_0 = const()[name = string("op_1633_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1633_dilations_0 = const()[name = string("op_1633_dilations_0"), val = tensor([1, 1])]; + int32 var_1633_groups_0 = const()[name = string("op_1633_groups_0"), val = int32(1)]; + tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56673856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59819648))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_1633_cast_fp16 = conv(dilations = var_1633_dilations_0, groups = var_1633_groups_0, pad = var_1633_pad_0, pad_type = var_1633_pad_type_0, strides = var_1633_strides_0, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_1633_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_1627_cast_fp16, y = var_1633_cast_fp16)[name = string("input_31_cast_fp16")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59820224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966016))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_39_cast_fp16 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("inputs_31_cast_fp16")]; + int32 var_1647 = const()[name = string("op_1647"), val = int32(3)]; + int32 var_1657 = const()[name = string("op_1657"), val = int32(-2)]; + int32 var_1665 = const()[name = string("op_1665"), val = int32(1)]; + tensor inputs_sq_33_cast_fp16 = mul(x = inputs_31_cast_fp16, y = inputs_31_cast_fp16)[name = string("inputs_sq_33_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = inputs_sq_33_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1677_to_fp16 = const()[name = string("op_1677_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1678_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1677_to_fp16)[name = string("op_1678_cast_fp16")]; + fp32 var_1679_epsilon_0 = const()[name = string("op_1679_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1679_cast_fp16 = rsqrt(epsilon = var_1679_epsilon_0, x = var_1678_cast_fp16)[name = string("op_1679_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = mul(x = inputs_31_cast_fp16, y = var_1679_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor w_33_to_fp16 = const()[name = string("w_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966592)))]; + tensor obj_37_cast_fp16 = mul(x = w_33_to_fp16, y = hidden_states_41_cast_fp16)[name = string("obj_37_cast_fp16")]; + string query_25_pad_type_0 = const()[name = string("query_25_pad_type_0"), val = string("valid")]; + tensor query_25_strides_0 = const()[name = string("query_25_strides_0"), val = tensor([1, 1])]; + tensor query_25_pad_0 = const()[name = string("query_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_25_dilations_0 = const()[name = string("query_25_dilations_0"), val = tensor([1, 1])]; + int32 query_25_groups_0 = const()[name = string("query_25_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65065920))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; + tensor query_25_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_25_dilations_0, groups = query_25_groups_0, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = query_25_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("query_25_cast_fp16")]; + string current_key_17_pad_type_0 = const()[name = string("current_key_17_pad_type_0"), val = string("valid")]; + tensor current_key_17_strides_0 = const()[name = string("current_key_17_strides_0"), val = tensor([1, 1])]; + tensor current_key_17_pad_0 = const()[name = string("current_key_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_key_17_dilations_0 = const()[name = string("current_key_17_dilations_0"), val = tensor([1, 1])]; + int32 current_key_17_groups_0 = const()[name = string("current_key_17_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65066496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66115136))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; + tensor current_key_17_cast_fp16 = conv(dilations = current_key_17_dilations_0, groups = current_key_17_groups_0, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = current_key_17_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_key_17_cast_fp16")]; + string current_value_pad_type_0 = const()[name = string("current_value_pad_type_0"), val = string("valid")]; + tensor current_value_strides_0 = const()[name = string("current_value_strides_0"), val = tensor([1, 1])]; + tensor current_value_pad_0 = const()[name = string("current_value_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor current_value_dilations_0 = const()[name = string("current_value_dilations_0"), val = tensor([1, 1])]; + int32 current_value_groups_0 = const()[name = string("current_value_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66115712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67164352))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; + tensor current_value_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("current_value_cast_fp16")]; + tensor var_1716 = const()[name = string("op_1716"), val = tensor([16, 128, 1, 1])]; + tensor inputs_33_cast_fp16 = reshape(shape = var_1716, x = query_25_cast_fp16)[name = string("inputs_33_cast_fp16")]; + tensor inputs_sq_35_cast_fp16 = mul(x = inputs_33_cast_fp16, y = inputs_33_cast_fp16)[name = string("inputs_sq_35_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = inputs_sq_35_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1722_to_fp16 = const()[name = string("op_1722_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1723_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1722_to_fp16)[name = string("op_1723_cast_fp16")]; + fp32 var_1724_epsilon_0 = const()[name = string("op_1724_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1724_cast_fp16 = rsqrt(epsilon = var_1724_epsilon_0, x = var_1723_cast_fp16)[name = string("op_1724_cast_fp16")]; + tensor hidden_states_43_cast_fp16 = mul(x = inputs_33_cast_fp16, y = var_1724_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; + tensor w_35_to_fp16 = const()[name = string("w_35_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67164928)))]; + tensor query_normed_cast_fp16 = mul(x = w_35_to_fp16, y = hidden_states_43_cast_fp16)[name = string("query_normed_cast_fp16")]; + tensor var_1732 = const()[name = string("op_1732"), val = tensor([8, 128, 1, 1])]; + tensor inputs_35_cast_fp16 = reshape(shape = var_1732, x = current_key_17_cast_fp16)[name = string("inputs_35_cast_fp16")]; + tensor inputs_sq_37_cast_fp16 = mul(x = inputs_35_cast_fp16, y = inputs_35_cast_fp16)[name = string("inputs_sq_37_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = inputs_sq_37_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1738_to_fp16 = const()[name = string("op_1738_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1739_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1738_to_fp16)[name = string("op_1739_cast_fp16")]; + fp32 var_1740_epsilon_0 = const()[name = string("op_1740_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1740_cast_fp16 = rsqrt(epsilon = var_1740_epsilon_0, x = var_1739_cast_fp16)[name = string("op_1740_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = mul(x = inputs_35_cast_fp16, y = var_1740_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor w_37_to_fp16 = const()[name = string("w_37_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67165248)))]; + tensor current_key_normed_cast_fp16 = mul(x = w_37_to_fp16, y = hidden_states_45_cast_fp16)[name = string("current_key_normed_cast_fp16")]; + tensor var_1758 = const()[name = string("op_1758"), val = tensor([1, 16, 128, -1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_1758, x = query_normed_cast_fp16)[name = string("mh_q_25_cast_fp16")]; + tensor var_1760 = const()[name = string("op_1760"), val = tensor([1, 8, 128, -1])]; + tensor mh_k_17_cast_fp16 = reshape(shape = var_1760, x = current_key_normed_cast_fp16)[name = string("mh_k_17_cast_fp16")]; + tensor var_1764_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor var_1769_begin_0 = const()[name = string("op_1769_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1769_end_0 = const()[name = string("op_1769_end_0"), val = tensor([1, 16, 64, 1])]; + tensor var_1769_end_mask_0 = const()[name = string("op_1769_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1769_cast_fp16 = slice_by_index(begin = var_1769_begin_0, end = var_1769_end_0, end_mask = var_1769_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1769_cast_fp16")]; + tensor var_1775_begin_0 = const()[name = string("op_1775_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1775_end_0 = const()[name = string("op_1775_end_0"), val = tensor([1, 16, 128, 1])]; + tensor var_1775_end_mask_0 = const()[name = string("op_1775_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1775_cast_fp16 = slice_by_index(begin = var_1775_begin_0, end = var_1775_end_0, end_mask = var_1775_end_mask_0, x = mh_q_25_cast_fp16)[name = string("op_1775_cast_fp16")]; + fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1777_cast_fp16 = mul(x = var_1775_cast_fp16, y = const_109_promoted_to_fp16)[name = string("op_1777_cast_fp16")]; + bool var_1779_interleave_0 = const()[name = string("op_1779_interleave_0"), val = bool(false)]; + tensor var_1779_cast_fp16 = concat(axis = var_1657, interleave = var_1779_interleave_0, values = (var_1777_cast_fp16, var_1769_cast_fp16))[name = string("op_1779_cast_fp16")]; + tensor var_1780_cast_fp16 = mul(x = var_1779_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1780_cast_fp16")]; + tensor mh_q_27_cast_fp16 = add(x = var_1764_cast_fp16, y = var_1780_cast_fp16)[name = string("mh_q_27_cast_fp16")]; + tensor var_1782_cast_fp16 = mul(x = mh_k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1782_cast_fp16")]; + tensor var_1787_begin_0 = const()[name = string("op_1787_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1787_end_0 = const()[name = string("op_1787_end_0"), val = tensor([1, 8, 64, 1])]; + tensor var_1787_end_mask_0 = const()[name = string("op_1787_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1787_cast_fp16")]; + tensor var_1793_begin_0 = const()[name = string("op_1793_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1793_end_0 = const()[name = string("op_1793_end_0"), val = tensor([1, 8, 128, 1])]; + tensor var_1793_end_mask_0 = const()[name = string("op_1793_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1793_cast_fp16 = slice_by_index(begin = var_1793_begin_0, end = var_1793_end_0, end_mask = var_1793_end_mask_0, x = mh_k_17_cast_fp16)[name = string("op_1793_cast_fp16")]; + fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1795_cast_fp16 = mul(x = var_1793_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_1795_cast_fp16")]; + bool var_1797_interleave_0 = const()[name = string("op_1797_interleave_0"), val = bool(false)]; + tensor var_1797_cast_fp16 = concat(axis = var_1657, interleave = var_1797_interleave_0, values = (var_1795_cast_fp16, var_1787_cast_fp16))[name = string("op_1797_cast_fp16")]; + tensor var_1798_cast_fp16 = mul(x = var_1797_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1798_cast_fp16")]; + tensor mh_k_cast_fp16 = add(x = var_1782_cast_fp16, y = var_1798_cast_fp16)[name = string("mh_k_cast_fp16")]; + tensor var_1802 = const()[name = string("op_1802"), val = tensor([1, 1024, 1, 1])]; + tensor current_key_cast_fp16 = reshape(shape = var_1802, x = mh_k_cast_fp16)[name = string("current_key_cast_fp16")]; + tensor var_1809_cast_fp16 = mul(x = var_84_cast_fp16_4, y = var_260_cast_fp16)[name = string("op_1809_cast_fp16")]; + tensor var_1810_cast_fp16 = mul(x = current_key_cast_fp16, y = var_258_cast_fp16)[name = string("op_1810_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_1809_cast_fp16, y = var_1810_cast_fp16)[name = string("key_27_cast_fp16")]; + tensor var_1813_cast_fp16 = mul(x = var_92_cast_fp16_4, y = var_260_cast_fp16)[name = string("op_1813_cast_fp16")]; + tensor var_1814_cast_fp16 = mul(x = current_value_cast_fp16, y = var_258_cast_fp16)[name = string("op_1814_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1813_cast_fp16, y = var_1814_cast_fp16)[name = string("value_17_cast_fp16")]; + tensor var_1818 = const()[name = string("op_1818"), val = tensor([1, 8, 128, 16])]; + tensor key_heads_17_cast_fp16 = reshape(shape = var_1818, x = key_27_cast_fp16)[name = string("key_heads_17_cast_fp16")]; + tensor var_1820 = const()[name = string("op_1820"), val = tensor([1, 8, 128, 16])]; + tensor value_heads_17_cast_fp16 = reshape(shape = var_1820, x = value_17_cast_fp16)[name = string("value_heads_17_cast_fp16")]; + tensor var_1823_begin_0 = const()[name = string("op_1823_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1823_end_0 = const()[name = string("op_1823_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1823_end_mask_0 = const()[name = string("op_1823_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1823_cast_fp16 = slice_by_index(begin = var_1823_begin_0, end = var_1823_end_0, end_mask = var_1823_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1823_cast_fp16")]; + tensor var_1827_begin_0 = const()[name = string("op_1827_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1827_end_0 = const()[name = string("op_1827_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1827_end_mask_0 = const()[name = string("op_1827_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1827_cast_fp16 = slice_by_index(begin = var_1827_begin_0, end = var_1827_end_0, end_mask = var_1827_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1827_cast_fp16")]; + tensor var_1839_begin_0 = const()[name = string("op_1839_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1839_end_0 = const()[name = string("op_1839_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1839_end_mask_0 = const()[name = string("op_1839_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1839_cast_fp16 = slice_by_index(begin = var_1839_begin_0, end = var_1839_end_0, end_mask = var_1839_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1839_cast_fp16")]; + tensor var_1843_begin_0 = const()[name = string("op_1843_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_1843_end_0 = const()[name = string("op_1843_end_0"), val = tensor([1, 2, 128, 16])]; + tensor var_1843_end_mask_0 = const()[name = string("op_1843_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1843_cast_fp16 = slice_by_index(begin = var_1843_begin_0, end = var_1843_end_0, end_mask = var_1843_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1843_cast_fp16")]; + tensor var_1855_begin_0 = const()[name = string("op_1855_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1855_end_0 = const()[name = string("op_1855_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1855_end_mask_0 = const()[name = string("op_1855_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1855_cast_fp16 = slice_by_index(begin = var_1855_begin_0, end = var_1855_end_0, end_mask = var_1855_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor var_1859_begin_0 = const()[name = string("op_1859_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_1859_end_0 = const()[name = string("op_1859_end_0"), val = tensor([1, 3, 128, 16])]; + tensor var_1859_end_mask_0 = const()[name = string("op_1859_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1859_cast_fp16 = slice_by_index(begin = var_1859_begin_0, end = var_1859_end_0, end_mask = var_1859_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1859_cast_fp16")]; + tensor var_1871_begin_0 = const()[name = string("op_1871_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1871_end_0 = const()[name = string("op_1871_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1871_end_mask_0 = const()[name = string("op_1871_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1871_cast_fp16 = slice_by_index(begin = var_1871_begin_0, end = var_1871_end_0, end_mask = var_1871_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1871_cast_fp16")]; + tensor var_1875_begin_0 = const()[name = string("op_1875_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1875_end_0 = const()[name = string("op_1875_end_0"), val = tensor([1, 4, 128, 16])]; + tensor var_1875_end_mask_0 = const()[name = string("op_1875_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = var_1875_end_0, end_mask = var_1875_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1875_cast_fp16")]; + tensor var_1887_begin_0 = const()[name = string("op_1887_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1887_end_0 = const()[name = string("op_1887_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1887_end_mask_0 = const()[name = string("op_1887_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1887_cast_fp16 = slice_by_index(begin = var_1887_begin_0, end = var_1887_end_0, end_mask = var_1887_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1887_cast_fp16")]; + tensor var_1891_begin_0 = const()[name = string("op_1891_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1891_end_0 = const()[name = string("op_1891_end_0"), val = tensor([1, 5, 128, 16])]; + tensor var_1891_end_mask_0 = const()[name = string("op_1891_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1891_cast_fp16 = slice_by_index(begin = var_1891_begin_0, end = var_1891_end_0, end_mask = var_1891_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1891_cast_fp16")]; + tensor var_1903_begin_0 = const()[name = string("op_1903_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1903_end_0 = const()[name = string("op_1903_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1903_end_mask_0 = const()[name = string("op_1903_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1903_cast_fp16")]; + tensor var_1907_begin_0 = const()[name = string("op_1907_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_1907_end_0 = const()[name = string("op_1907_end_0"), val = tensor([1, 6, 128, 16])]; + tensor var_1907_end_mask_0 = const()[name = string("op_1907_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1907_cast_fp16 = slice_by_index(begin = var_1907_begin_0, end = var_1907_end_0, end_mask = var_1907_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1907_cast_fp16")]; + tensor var_1919_begin_0 = const()[name = string("op_1919_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1919_end_0 = const()[name = string("op_1919_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1919_end_mask_0 = const()[name = string("op_1919_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1919_cast_fp16 = slice_by_index(begin = var_1919_begin_0, end = var_1919_end_0, end_mask = var_1919_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1919_cast_fp16")]; + tensor var_1923_begin_0 = const()[name = string("op_1923_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1923_end_0 = const()[name = string("op_1923_end_0"), val = tensor([1, 7, 128, 16])]; + tensor var_1923_end_mask_0 = const()[name = string("op_1923_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1923_cast_fp16 = slice_by_index(begin = var_1923_begin_0, end = var_1923_end_0, end_mask = var_1923_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1923_cast_fp16")]; + tensor var_1935_begin_0 = const()[name = string("op_1935_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1935_end_0 = const()[name = string("op_1935_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1935_end_mask_0 = const()[name = string("op_1935_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1935_cast_fp16 = slice_by_index(begin = var_1935_begin_0, end = var_1935_end_0, end_mask = var_1935_end_mask_0, x = key_heads_17_cast_fp16)[name = string("op_1935_cast_fp16")]; + tensor var_1939_begin_0 = const()[name = string("op_1939_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_1939_end_0 = const()[name = string("op_1939_end_0"), val = tensor([1, 1, 128, 16])]; + tensor var_1939_end_mask_0 = const()[name = string("op_1939_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1939_cast_fp16 = slice_by_index(begin = var_1939_begin_0, end = var_1939_end_0, end_mask = var_1939_end_mask_0, x = value_heads_17_cast_fp16)[name = string("op_1939_cast_fp16")]; + bool key_heads_interleave_0 = const()[name = string("key_heads_interleave_0"), val = bool(false)]; + tensor key_heads_cast_fp16 = concat(axis = var_1665, interleave = key_heads_interleave_0, values = (var_1823_cast_fp16, var_1823_cast_fp16, var_1839_cast_fp16, var_1839_cast_fp16, var_1855_cast_fp16, var_1855_cast_fp16, var_1871_cast_fp16, var_1871_cast_fp16, var_1887_cast_fp16, var_1887_cast_fp16, var_1903_cast_fp16, var_1903_cast_fp16, var_1919_cast_fp16, var_1919_cast_fp16, var_1935_cast_fp16, var_1935_cast_fp16))[name = string("key_heads_cast_fp16")]; + bool value_heads_interleave_0 = const()[name = string("value_heads_interleave_0"), val = bool(false)]; + tensor value_heads_cast_fp16 = concat(axis = var_1665, interleave = value_heads_interleave_0, values = (var_1827_cast_fp16, var_1827_cast_fp16, var_1843_cast_fp16, var_1843_cast_fp16, var_1859_cast_fp16, var_1859_cast_fp16, var_1875_cast_fp16, var_1875_cast_fp16, var_1891_cast_fp16, var_1891_cast_fp16, var_1907_cast_fp16, var_1907_cast_fp16, var_1923_cast_fp16, var_1923_cast_fp16, var_1939_cast_fp16, var_1939_cast_fp16))[name = string("value_heads_cast_fp16")]; + fp16 var_1962_to_fp16 = const()[name = string("op_1962_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor var_1963_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1962_to_fp16)[name = string("op_1963_cast_fp16")]; + bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; + bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1963_cast_fp16, y = key_heads_cast_fp16)[name = string("mh_w_17_cast_fp16")]; + tensor mh_w_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_424_cast_fp16)[name = string("mh_w_cast_fp16")]; + tensor var_1975_cast_fp16 = softmax(axis = var_1647, x = mh_w_cast_fp16)[name = string("op_1975_cast_fp16")]; + bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; + bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = value_heads_cast_fp16, y = var_1975_cast_fp16)[name = string("attn_cast_fp16")]; + tensor var_1980 = const()[name = string("op_1980"), val = tensor([1, -1, 1, 1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1980, x = attn_cast_fp16)[name = string("input_33_cast_fp16")]; + string obj_pad_type_0 = const()[name = string("obj_pad_type_0"), val = string("valid")]; + tensor obj_strides_0 = const()[name = string("obj_strides_0"), val = tensor([1, 1])]; + tensor obj_pad_0 = const()[name = string("obj_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_dilations_0 = const()[name = string("obj_dilations_0"), val = tensor([1, 1])]; + int32 obj_groups_0 = const()[name = string("obj_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67165568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69262784))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; + tensor obj_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("obj_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_cast_fp16)[name = string("inputs_37_cast_fp16")]; + tensor inputs_sq_39_cast_fp16 = mul(x = inputs_37_cast_fp16, y = inputs_37_cast_fp16)[name = string("inputs_sq_39_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = inputs_sq_39_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1999_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1998_to_fp16)[name = string("op_1999_cast_fp16")]; + fp32 var_2000_epsilon_0 = const()[name = string("op_2000_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2000_cast_fp16 = rsqrt(epsilon = var_2000_epsilon_0, x = var_1999_cast_fp16)[name = string("op_2000_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = mul(x = inputs_37_cast_fp16, y = var_2000_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; + tensor w_39_to_fp16 = const()[name = string("w_39_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263360)))]; + tensor input_35_cast_fp16 = mul(x = w_39_to_fp16, y = hidden_states_47_cast_fp16)[name = string("input_35_cast_fp16")]; + string input_37_pad_type_0 = const()[name = string("input_37_pad_type_0"), val = string("valid")]; + tensor input_37_strides_0 = const()[name = string("input_37_strides_0"), val = tensor([1, 1])]; + tensor input_37_pad_0 = const()[name = string("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_37_dilations_0 = const()[name = string("input_37_dilations_0"), val = tensor([1, 1])]; + int32 input_37_groups_0 = const()[name = string("input_37_groups_0"), val = int32(1)]; + tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69265472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72411264))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; + tensor input_37_cast_fp16 = conv(dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")]; + tensor var_2014_cast_fp16 = silu(x = input_37_cast_fp16)[name = string("op_2014_cast_fp16")]; + string var_2020_pad_type_0 = const()[name = string("op_2020_pad_type_0"), val = string("valid")]; + tensor var_2020_strides_0 = const()[name = string("op_2020_strides_0"), val = tensor([1, 1])]; + tensor var_2020_pad_0 = const()[name = string("op_2020_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2020_dilations_0 = const()[name = string("op_2020_dilations_0"), val = tensor([1, 1])]; + int32 var_2020_groups_0 = const()[name = string("op_2020_groups_0"), val = int32(1)]; + tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72411840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75557632))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; + tensor var_2020_cast_fp16 = conv(dilations = var_2020_dilations_0, groups = var_2020_groups_0, pad = var_2020_pad_0, pad_type = var_2020_pad_type_0, strides = var_2020_strides_0, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_2020_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_2014_cast_fp16, y = var_2020_cast_fp16)[name = string("input_39_cast_fp16")]; + string hidden_states_49_pad_type_0 = const()[name = string("hidden_states_49_pad_type_0"), val = string("valid")]; + tensor hidden_states_49_strides_0 = const()[name = string("hidden_states_49_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_49_pad_0 = const()[name = string("hidden_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_49_dilations_0 = const()[name = string("hidden_states_49_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_49_groups_0 = const()[name = string("hidden_states_49_groups_0"), val = int32(1)]; + tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75558208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704000))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; + tensor hidden_states_49_cast_fp16 = conv(dilations = hidden_states_49_dilations_0, groups = hidden_states_49_groups_0, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = hidden_states_49_strides_0, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_37_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("inputs_cast_fp16")]; + tensor inputs_sq_cast_fp16 = mul(x = inputs_cast_fp16, y = inputs_cast_fp16)[name = string("inputs_sq_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = inputs_sq_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_2041_to_fp16 = const()[name = string("op_2041_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2042_cast_fp16 = add(x = variance_cast_fp16, y = var_2041_to_fp16)[name = string("op_2042_cast_fp16")]; + fp32 var_2043_epsilon_0 = const()[name = string("op_2043_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2043_cast_fp16 = rsqrt(epsilon = var_2043_epsilon_0, x = var_2042_cast_fp16)[name = string("op_2043_cast_fp16")]; + tensor hidden_states_cast_fp16 = mul(x = inputs_cast_fp16, y = var_2043_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor w_to_fp16 = const()[name = string("w_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704576)))]; + tensor input_cast_fp16 = mul(x = w_to_fp16, y = hidden_states_cast_fp16)[name = string("input_cast_fp16")]; + string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; + tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1, 1])]; + tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1, 1])]; + int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; + tensor lm_heads_0_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78706688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80803904))))[name = string("lm_heads_0_weight_to_fp16_palettized")]; + tensor logits_1_cast_fp16 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = lm_heads_0_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_1_cast_fp16")]; + tensor var_2060_axes_0 = const()[name = string("op_2060_axes_0"), val = tensor([3])]; + tensor var_2060_cast_fp16 = squeeze(axes = var_2060_axes_0, x = logits_1_cast_fp16)[name = string("op_2060_cast_fp16")]; + string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; + tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1, 1])]; + tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1, 1])]; + int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; + tensor lm_heads_1_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80804480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901696))))[name = string("lm_heads_1_weight_to_fp16_palettized")]; + tensor logits_3_cast_fp16 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = lm_heads_1_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_3_cast_fp16")]; + tensor var_2076_axes_0 = const()[name = string("op_2076_axes_0"), val = tensor([3])]; + tensor var_2076_cast_fp16 = squeeze(axes = var_2076_axes_0, x = logits_3_cast_fp16)[name = string("op_2076_cast_fp16")]; + string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; + tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1, 1])]; + tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1, 1])]; + int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; + tensor lm_heads_2_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82902272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999488))))[name = string("lm_heads_2_weight_to_fp16_palettized")]; + tensor logits_5_cast_fp16 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = lm_heads_2_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_5_cast_fp16")]; + tensor var_2092_axes_0 = const()[name = string("op_2092_axes_0"), val = tensor([3])]; + tensor var_2092_cast_fp16 = squeeze(axes = var_2092_axes_0, x = logits_5_cast_fp16)[name = string("op_2092_cast_fp16")]; + string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; + tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1, 1])]; + tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1, 1])]; + int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; + tensor lm_heads_3_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85000064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87097280))))[name = string("lm_heads_3_weight_to_fp16_palettized")]; + tensor logits_7_cast_fp16 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = lm_heads_3_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_7_cast_fp16")]; + tensor var_2108_axes_0 = const()[name = string("op_2108_axes_0"), val = tensor([3])]; + tensor var_2108_cast_fp16 = squeeze(axes = var_2108_axes_0, x = logits_7_cast_fp16)[name = string("op_2108_cast_fp16")]; + string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; + tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1, 1])]; + tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1, 1])]; + int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; + tensor lm_heads_4_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87097856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89195072))))[name = string("lm_heads_4_weight_to_fp16_palettized")]; + tensor logits_9_cast_fp16 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = lm_heads_4_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_9_cast_fp16")]; + tensor var_2124_axes_0 = const()[name = string("op_2124_axes_0"), val = tensor([3])]; + tensor var_2124_cast_fp16 = squeeze(axes = var_2124_axes_0, x = logits_9_cast_fp16)[name = string("op_2124_cast_fp16")]; + string logits_11_pad_type_0 = const()[name = string("logits_11_pad_type_0"), val = string("valid")]; + tensor logits_11_strides_0 = const()[name = string("logits_11_strides_0"), val = tensor([1, 1])]; + tensor logits_11_pad_0 = const()[name = string("logits_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_11_dilations_0 = const()[name = string("logits_11_dilations_0"), val = tensor([1, 1])]; + int32 logits_11_groups_0 = const()[name = string("logits_11_groups_0"), val = int32(1)]; + tensor lm_heads_5_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89195648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91292864))))[name = string("lm_heads_5_weight_to_fp16_palettized")]; + tensor logits_11_cast_fp16 = conv(dilations = logits_11_dilations_0, groups = logits_11_groups_0, pad = logits_11_pad_0, pad_type = logits_11_pad_type_0, strides = logits_11_strides_0, weight = lm_heads_5_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_11_cast_fp16")]; + tensor var_2140_axes_0 = const()[name = string("op_2140_axes_0"), val = tensor([3])]; + tensor var_2140_cast_fp16 = squeeze(axes = var_2140_axes_0, x = logits_11_cast_fp16)[name = string("op_2140_cast_fp16")]; + string logits_13_pad_type_0 = const()[name = string("logits_13_pad_type_0"), val = string("valid")]; + tensor logits_13_strides_0 = const()[name = string("logits_13_strides_0"), val = tensor([1, 1])]; + tensor logits_13_pad_0 = const()[name = string("logits_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_13_dilations_0 = const()[name = string("logits_13_dilations_0"), val = tensor([1, 1])]; + int32 logits_13_groups_0 = const()[name = string("logits_13_groups_0"), val = int32(1)]; + tensor lm_heads_6_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93390656))))[name = string("lm_heads_6_weight_to_fp16_palettized")]; + tensor logits_13_cast_fp16 = conv(dilations = logits_13_dilations_0, groups = logits_13_groups_0, pad = logits_13_pad_0, pad_type = logits_13_pad_type_0, strides = logits_13_strides_0, weight = lm_heads_6_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_13_cast_fp16")]; + tensor var_2156_axes_0 = const()[name = string("op_2156_axes_0"), val = tensor([3])]; + tensor var_2156_cast_fp16 = squeeze(axes = var_2156_axes_0, x = logits_13_cast_fp16)[name = string("op_2156_cast_fp16")]; + string logits_15_pad_type_0 = const()[name = string("logits_15_pad_type_0"), val = string("valid")]; + tensor logits_15_strides_0 = const()[name = string("logits_15_strides_0"), val = tensor([1, 1])]; + tensor logits_15_pad_0 = const()[name = string("logits_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_15_dilations_0 = const()[name = string("logits_15_dilations_0"), val = tensor([1, 1])]; + int32 logits_15_groups_0 = const()[name = string("logits_15_groups_0"), val = int32(1)]; + tensor lm_heads_7_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93391232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95488448))))[name = string("lm_heads_7_weight_to_fp16_palettized")]; + tensor logits_15_cast_fp16 = conv(dilations = logits_15_dilations_0, groups = logits_15_groups_0, pad = logits_15_pad_0, pad_type = logits_15_pad_type_0, strides = logits_15_strides_0, weight = lm_heads_7_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_15_cast_fp16")]; + tensor var_2172_axes_0 = const()[name = string("op_2172_axes_0"), val = tensor([3])]; + tensor var_2172_cast_fp16 = squeeze(axes = var_2172_axes_0, x = logits_15_cast_fp16)[name = string("op_2172_cast_fp16")]; + string logits_17_pad_type_0 = const()[name = string("logits_17_pad_type_0"), val = string("valid")]; + tensor logits_17_strides_0 = const()[name = string("logits_17_strides_0"), val = tensor([1, 1])]; + tensor logits_17_pad_0 = const()[name = string("logits_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_17_dilations_0 = const()[name = string("logits_17_dilations_0"), val = tensor([1, 1])]; + int32 logits_17_groups_0 = const()[name = string("logits_17_groups_0"), val = int32(1)]; + tensor lm_heads_8_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95489024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97586240))))[name = string("lm_heads_8_weight_to_fp16_palettized")]; + tensor logits_17_cast_fp16 = conv(dilations = logits_17_dilations_0, groups = logits_17_groups_0, pad = logits_17_pad_0, pad_type = logits_17_pad_type_0, strides = logits_17_strides_0, weight = lm_heads_8_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_17_cast_fp16")]; + tensor var_2188_axes_0 = const()[name = string("op_2188_axes_0"), val = tensor([3])]; + tensor var_2188_cast_fp16 = squeeze(axes = var_2188_axes_0, x = logits_17_cast_fp16)[name = string("op_2188_cast_fp16")]; + string logits_19_pad_type_0 = const()[name = string("logits_19_pad_type_0"), val = string("valid")]; + tensor logits_19_strides_0 = const()[name = string("logits_19_strides_0"), val = tensor([1, 1])]; + tensor logits_19_pad_0 = const()[name = string("logits_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_19_dilations_0 = const()[name = string("logits_19_dilations_0"), val = tensor([1, 1])]; + int32 logits_19_groups_0 = const()[name = string("logits_19_groups_0"), val = int32(1)]; + tensor lm_heads_9_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97586816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99684032))))[name = string("lm_heads_9_weight_to_fp16_palettized")]; + tensor logits_19_cast_fp16 = conv(dilations = logits_19_dilations_0, groups = logits_19_groups_0, pad = logits_19_pad_0, pad_type = logits_19_pad_type_0, strides = logits_19_strides_0, weight = lm_heads_9_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_19_cast_fp16")]; + tensor var_2204_axes_0 = const()[name = string("op_2204_axes_0"), val = tensor([3])]; + tensor var_2204_cast_fp16 = squeeze(axes = var_2204_axes_0, x = logits_19_cast_fp16)[name = string("op_2204_cast_fp16")]; + string logits_21_pad_type_0 = const()[name = string("logits_21_pad_type_0"), val = string("valid")]; + tensor logits_21_strides_0 = const()[name = string("logits_21_strides_0"), val = tensor([1, 1])]; + tensor logits_21_pad_0 = const()[name = string("logits_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_21_dilations_0 = const()[name = string("logits_21_dilations_0"), val = tensor([1, 1])]; + int32 logits_21_groups_0 = const()[name = string("logits_21_groups_0"), val = int32(1)]; + tensor lm_heads_10_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99684608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101781824))))[name = string("lm_heads_10_weight_to_fp16_palettized")]; + tensor logits_21_cast_fp16 = conv(dilations = logits_21_dilations_0, groups = logits_21_groups_0, pad = logits_21_pad_0, pad_type = logits_21_pad_type_0, strides = logits_21_strides_0, weight = lm_heads_10_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_21_cast_fp16")]; + tensor var_2220_axes_0 = const()[name = string("op_2220_axes_0"), val = tensor([3])]; + tensor var_2220_cast_fp16 = squeeze(axes = var_2220_axes_0, x = logits_21_cast_fp16)[name = string("op_2220_cast_fp16")]; + string logits_23_pad_type_0 = const()[name = string("logits_23_pad_type_0"), val = string("valid")]; + tensor logits_23_strides_0 = const()[name = string("logits_23_strides_0"), val = tensor([1, 1])]; + tensor logits_23_pad_0 = const()[name = string("logits_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_23_dilations_0 = const()[name = string("logits_23_dilations_0"), val = tensor([1, 1])]; + int32 logits_23_groups_0 = const()[name = string("logits_23_groups_0"), val = int32(1)]; + tensor lm_heads_11_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101782400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103879616))))[name = string("lm_heads_11_weight_to_fp16_palettized")]; + tensor logits_23_cast_fp16 = conv(dilations = logits_23_dilations_0, groups = logits_23_groups_0, pad = logits_23_pad_0, pad_type = logits_23_pad_type_0, strides = logits_23_strides_0, weight = lm_heads_11_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_23_cast_fp16")]; + tensor var_2236_axes_0 = const()[name = string("op_2236_axes_0"), val = tensor([3])]; + tensor var_2236_cast_fp16 = squeeze(axes = var_2236_axes_0, x = logits_23_cast_fp16)[name = string("op_2236_cast_fp16")]; + string logits_25_pad_type_0 = const()[name = string("logits_25_pad_type_0"), val = string("valid")]; + tensor logits_25_strides_0 = const()[name = string("logits_25_strides_0"), val = tensor([1, 1])]; + tensor logits_25_pad_0 = const()[name = string("logits_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_25_dilations_0 = const()[name = string("logits_25_dilations_0"), val = tensor([1, 1])]; + int32 logits_25_groups_0 = const()[name = string("logits_25_groups_0"), val = int32(1)]; + tensor lm_heads_12_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105977408))))[name = string("lm_heads_12_weight_to_fp16_palettized")]; + tensor logits_25_cast_fp16 = conv(dilations = logits_25_dilations_0, groups = logits_25_groups_0, pad = logits_25_pad_0, pad_type = logits_25_pad_type_0, strides = logits_25_strides_0, weight = lm_heads_12_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_25_cast_fp16")]; + tensor var_2252_axes_0 = const()[name = string("op_2252_axes_0"), val = tensor([3])]; + tensor var_2252_cast_fp16 = squeeze(axes = var_2252_axes_0, x = logits_25_cast_fp16)[name = string("op_2252_cast_fp16")]; + string logits_27_pad_type_0 = const()[name = string("logits_27_pad_type_0"), val = string("valid")]; + tensor logits_27_strides_0 = const()[name = string("logits_27_strides_0"), val = tensor([1, 1])]; + tensor logits_27_pad_0 = const()[name = string("logits_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_27_dilations_0 = const()[name = string("logits_27_dilations_0"), val = tensor([1, 1])]; + int32 logits_27_groups_0 = const()[name = string("logits_27_groups_0"), val = int32(1)]; + tensor lm_heads_13_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105977984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108075200))))[name = string("lm_heads_13_weight_to_fp16_palettized")]; + tensor logits_27_cast_fp16 = conv(dilations = logits_27_dilations_0, groups = logits_27_groups_0, pad = logits_27_pad_0, pad_type = logits_27_pad_type_0, strides = logits_27_strides_0, weight = lm_heads_13_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_27_cast_fp16")]; + tensor var_2268_axes_0 = const()[name = string("op_2268_axes_0"), val = tensor([3])]; + tensor var_2268_cast_fp16 = squeeze(axes = var_2268_axes_0, x = logits_27_cast_fp16)[name = string("op_2268_cast_fp16")]; + string logits_29_pad_type_0 = const()[name = string("logits_29_pad_type_0"), val = string("valid")]; + tensor logits_29_strides_0 = const()[name = string("logits_29_strides_0"), val = tensor([1, 1])]; + tensor logits_29_pad_0 = const()[name = string("logits_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor logits_29_dilations_0 = const()[name = string("logits_29_dilations_0"), val = tensor([1, 1])]; + int32 logits_29_groups_0 = const()[name = string("logits_29_groups_0"), val = int32(1)]; + tensor lm_heads_14_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108075776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110172992))))[name = string("lm_heads_14_weight_to_fp16_palettized")]; + tensor logits_29_cast_fp16 = conv(dilations = logits_29_dilations_0, groups = logits_29_groups_0, pad = logits_29_pad_0, pad_type = logits_29_pad_type_0, strides = logits_29_strides_0, weight = lm_heads_14_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("logits_29_cast_fp16")]; + tensor var_2284_axes_0 = const()[name = string("op_2284_axes_0"), val = tensor([3])]; + tensor var_2284_cast_fp16 = squeeze(axes = var_2284_axes_0, x = logits_29_cast_fp16)[name = string("op_2284_cast_fp16")]; + bool var_2290_interleave_0 = const()[name = string("op_2290_interleave_0"), val = bool(false)]; + int32 const_119 = const()[name = string("const_119"), val = int32(2)]; + tensor var_2290_cast_fp16 = concat(axis = const_119, interleave = var_2290_interleave_0, values = (var_2060_cast_fp16, var_2076_cast_fp16, var_2092_cast_fp16, var_2108_cast_fp16, var_2124_cast_fp16, var_2140_cast_fp16, var_2156_cast_fp16, var_2172_cast_fp16, var_2188_cast_fp16, var_2204_cast_fp16, var_2220_cast_fp16, var_2236_cast_fp16, var_2252_cast_fp16, var_2268_cast_fp16, var_2284_cast_fp16))[name = string("op_2290_cast_fp16")]; + int32 var_2292 = const()[name = string("op_2292"), val = int32(1)]; + bool var_2293_interleave_0 = const()[name = string("op_2293_interleave_0"), val = bool(false)]; + tensor key_cache_updates = concat(axis = var_2292, interleave = var_2293_interleave_0, values = (current_key_3_cast_fp16, current_key_7_cast_fp16, current_key_11_cast_fp16, current_key_15_cast_fp16, current_key_cast_fp16))[name = string("op_2293_cast_fp16")]; + int32 var_2295 = const()[name = string("op_2295"), val = int32(1)]; + bool var_2296_interleave_0 = const()[name = string("op_2296_interleave_0"), val = bool(false)]; + tensor value_cache_updates = concat(axis = var_2295, interleave = var_2296_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_cast_fp16))[name = string("op_2296_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 2, 1])]; + tensor all_logits = transpose(perm = transpose_0_perm_0, x = var_2290_cast_fp16)[name = string("transpose_0")]; + } -> (all_logits, key_cache_updates, value_cache_updates); +} \ No newline at end of file